Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -40,6 +40,9 @@ void addParameters(__isl_take isl_set *Context); void create(__isl_take isl_ast_node *Node); + /// @brief Preload all memory loads that are invariant. + void preloadInvariantLoads(); + /// @brief Finalize code generation for the SCoP @p S. /// /// @see BlockGenerator::finalizeSCoP(Scop &S) @@ -165,6 +168,22 @@ /// @param NewValues A map that maps certain llvm::Values to new llvm::Values. void updateValues(ParallelLoopGenerator::ValueToValueMapTy &NewValues); + /// @brief Preload the memory load access @p MA. + /// + /// If @p MA is not always executed it will be conditionally loaded and + /// merged with undef from the same type. Hence, if @p MA is executed only + /// under condition C then the preload code will look like this: + /// + /// MA_preload = undef; + /// if (C) + /// MA_preload = load MA; + /// use MA_preload + /// + /// Note that if the runtime context implies the condition C conditional + /// execution is not necessary. + Value *preloadInvariantLoad(MemoryAccess &MA, __isl_keep isl_ast_build *Build, + __isl_keep isl_set *RuntimeContext); + void createFor(__isl_take isl_ast_node *For); void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(__isl_take isl_ast_node *For); Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -89,6 +89,9 @@ /// @brief Destructor to free the isl id of the base pointer. ~ScopArrayInfo(); + /// @brief Set the base pointer to @p BP. + void setBasePtr(Value *BP) { BasePtr = BP; } + /// @brief Return the base pointer. Value *getBasePtr() const { return BasePtr; } @@ -275,6 +278,9 @@ /// Updated access relation read from JSCOP file. isl_map *newAccessRelation; + /// @brief Flag to indicate this is an invariant memory load. + bool IsInvariantLoad; + /// @brief A unique identifier for this memory access. /// /// The identifier is unique between all memory accesses belonging to the same @@ -364,6 +370,12 @@ /// @brief Check if a new access relation was imported or set by a pass. bool hasNewAccessRelation() const { return newAccessRelation; } + /// @brief Is this an invariant memory load? + bool isInvariantLoad() const { return IsInvariantLoad; } + + /// @brief Mark this as an invariant memory load. + void markAsInvariantLoad() { IsInvariantLoad = true; } + /// @brief Return the newest access relation of this access. /// /// There are two possibilities: @@ -915,8 +927,8 @@ /// @return True if the basic block is trivial, otherwise false. static bool isTrivialBB(BasicBlock *BB, TempScop &tempScop); - /// @brief Add parameter constraints to @p C that imply a non-empty domain. - __isl_give isl_set *addNonEmptyDomainConstraints(__isl_take isl_set *C) const; + /// @brief Identify and mark all invariant memory loads in the SCoP. + void markInvariantLoads(); /// @brief Build the Context of the Scop. void buildContext(); @@ -1056,6 +1068,9 @@ /// @brief Get the name of this Scop. std::string getNameStr() const; + /// @brief Add parameter constraints to @p C that imply a non-empty domain. + __isl_give isl_set *addNonEmptyDomainConstraints(__isl_take isl_set *C) const; + /// @brief Get the constraint on parameter of this Scop. /// /// @return The constraint on parameter of this Scop. Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -442,18 +442,6 @@ if (!isInvariant(*Operand, Reg)) return false; - // When the instruction is a load instruction, check that no write to memory - // in the region aliases with the load. - if (const LoadInst *LI = dyn_cast(I)) { - auto Loc = MemoryLocation::get(LI); - - // Check if any basic block in the region can modify the location pointed to - // by 'Loc'. If so, 'Val' is (likely) not invariant in the region. - for (const BasicBlock *BB : Reg.blocks()) - if (AA->canBasicBlockModify(*BB, Loc)) - return false; - } - return true; } Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -483,7 +483,7 @@ int Identifier) : AccType(getMemoryAccessType(Access)), Statement(Statement), AccessInstruction(AccInst), AccessValue(Access.getAccessValue()), - newAccessRelation(nullptr) { + newAccessRelation(nullptr), IsInvariantLoad(false) { isl_ctx *Ctx = Statement->getIslCtx(); BaseAddr = Access.getBase(); @@ -577,7 +577,11 @@ break; } OS << "[Reduction Type: " << getReductionType() << "] "; - OS << "[Scalar: " << isScalar() << "]\n"; + OS << "[Scalar: " << isScalar() << "] "; + if (isInvariantLoad()) + OS << "[Invariant]\n"; + else + OS << "\n"; OS.indent(16) << getOriginalAccessRelationStr() << ";\n"; } @@ -1576,6 +1580,8 @@ simplifyAssumedContext(); buildAliasChecks(AA); + markInvariantLoads(); + assert(NestLoops.empty() && "NestLoops not empty at top level!"); } @@ -1608,6 +1614,46 @@ } } +void Scop::markInvariantLoads() { + isl_union_map *Writes = getWrites(); + for (ScopStmt &Stmt : *this) { + + // Skip statements that are not surrounded by loops as we would only + // move the load to a prior location and not actually hoist it. + if (Stmt.getNumIterators() == 0) + continue; + + isl_set *Domain = Stmt.getDomain(); + for (MemoryAccess *MA : Stmt) { + if (MA->isScalar() || MA->isWrite()) + continue; + + isl_map *AccessRelation = MA->getAccessRelation(); + if (isl_map_involves_dims(AccessRelation, isl_dim_in, 0, + Stmt.getNumIterators())) { + isl_map_free(AccessRelation); + continue; + } + + AccessRelation = + isl_map_intersect_domain(AccessRelation, isl_set_copy(Domain)); + isl_set *AccessRange = isl_map_range(AccessRelation); + + isl_union_map *Written = isl_union_map_intersect_range( + isl_union_map_copy(Writes), isl_union_set_from_set(AccessRange)); + bool IsWritten = !isl_union_map_is_empty(Written); + isl_union_map_free(Written); + + if (IsWritten) + continue; + + MA->markAsInvariantLoad(); + } + isl_set_free(Domain); + } + isl_union_map_free(Writes); +} + const ScopArrayInfo * Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *AccessType, const SmallVector &Sizes, Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -220,6 +220,9 @@ ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S) { + if (Value *PreloadLoad = GlobalMap.lookup(Load)) + return PreloadLoad; + const Value *Pointer = Load->getPointerOperand(); Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap, GlobalMap, LTS); @@ -668,15 +671,23 @@ ValueMapT &BBMap) { const Value *Pointer = Load->getPointerOperand(); Type *VectorPtrType = getVectorPtrTy(Pointer, 1); - Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap, - GlobalMaps[0], VLTS[0]); - Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, - Load->getName() + "_p_vec_p"); - LoadInst *ScalarLoad = - Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); - if (!Aligned) - ScalarLoad->setAlignment(8); + Value *ScalarLoad = GlobalMaps[0].lookup(Load); + + if (ScalarLoad) { + ScalarLoad = Builder.CreateBitCast(ScalarLoad, + VectorPtrType->getPointerElementType()); + } else { + Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap, + GlobalMaps[0], VLTS[0]); + Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, + Load->getName() + "_p_vec_p"); + ScalarLoad = + Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); + + if (!Aligned) + cast(ScalarLoad)->setAlignment(8); + } Constant *SplatVector = Constant::getNullValue( VectorType::get(Builder.getInt32Ty(), getVectorWidth())); Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -145,6 +145,7 @@ Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); SplitBlock->getTerminator()->setOperand(0, RTC); Builder.SetInsertPoint(StartBlock->begin()); + NodeBuilder.preloadInvariantLoads(); NodeBuilder.create(AstRoot); Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -154,6 +154,7 @@ Region &R; SetVector &Values; SetVector &SCEVs; + ValueMapT &ValueMap; }; /// @brief Extract the values and SCEVs needed to generate code for a block. @@ -169,8 +170,9 @@ continue; } if (Instruction *OpInst = dyn_cast(SrcVal)) - if (Stmt->getParent()->getRegion().contains(OpInst)) - continue; + if (User.ValueMap.count(OpInst) == 0) + if (Stmt->getParent()->getRegion().contains(OpInst)) + continue; if (isa(SrcVal) || isa(SrcVal)) User.Values.insert(SrcVal); @@ -208,7 +210,8 @@ SetVector &Loops) { SetVector SCEVs; - struct FindValuesUser FindValues = {LI, SE, S.getRegion(), Values, SCEVs}; + struct FindValuesUser FindValues = {LI, SE, S.getRegion(), + Values, SCEVs, ValueMap}; for (const auto &I : IDToValue) Values.insert(I.second); @@ -712,6 +715,103 @@ llvm_unreachable("Unknown isl_ast_node type"); } +/// @brief Create the actual preload memory access for @p MA. +static inline Value *createPreloadLoad(Scop &S, MemoryAccess &MA, + isl_ast_build *Build, + IslExprBuilder &ExprBuilder) { + isl_set *AccessRange = isl_map_range(MA.getAccessRelation()); + isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange); + PWAccRel = isl_pw_multi_aff_gist_params(PWAccRel, S.getContext()); + isl_ast_expr *Access = + isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); + return ExprBuilder.create(Access); +} + +Value *IslNodeBuilder::preloadInvariantLoad(MemoryAccess &MA, + isl_ast_build *Build, + isl_set *RuntimeContext) { + + isl_set *Domain = isl_set_params(MA.getStatement()->getDomain()); + + if (isl_set_is_subset(RuntimeContext, Domain)) { + isl_set_free(Domain); + return createPreloadLoad(S, MA, Build, ExprBuilder); + } else { + + isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); + + Value *Cond = ExprBuilder.create(DomainCond); + if (!Cond->getType()->isIntegerTy(1)) + Cond = Builder.CreateIsNotNull(Cond); + + BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), + Builder.GetInsertPoint(), &DT, &LI); + CondBB->setName("polly.inv.preload.cond"); + + BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), &DT, &LI); + MergeBB->setName("polly.inv.preload.merge"); + + Function *F = Builder.GetInsertBlock()->getParent(); + LLVMContext &Context = F->getContext(); + BasicBlock *ExecBB = + BasicBlock::Create(Context, "polly.inv.preload.exec", F); + + DT.addNewBlock(ExecBB, CondBB); + if (Loop *L = LI.getLoopFor(CondBB)) + L->addBasicBlockToLoop(ExecBB, LI); + + auto *CondBBTerminator = CondBB->getTerminator(); + Builder.SetInsertPoint(CondBBTerminator); + Builder.CreateCondBr(Cond, ExecBB, MergeBB); + CondBBTerminator->eraseFromParent(); + + Builder.SetInsertPoint(ExecBB); + Builder.CreateBr(MergeBB); + + Builder.SetInsertPoint(ExecBB->getTerminator()); + Instruction *AccInst = MA.getAccessInstruction(); + Type *AccInstTy = AccInst->getType(); + Value *PreAccInst = createPreloadLoad(S, MA, Build, ExprBuilder); + + Builder.SetInsertPoint(MergeBB->getTerminator()); + auto *MergePHI = Builder.CreatePHI( + AccInstTy, 2, "polly.inv.preload." + AccInst->getName() + ".merge"); + MergePHI->addIncoming(PreAccInst, ExecBB); + MergePHI->addIncoming(UndefValue::get(AccInstTy), CondBB); + + return MergePHI; + } +} + +void IslNodeBuilder::preloadInvariantLoads() { + + isl_ast_build *Build = + isl_ast_build_from_context(isl_set_universe(S.getParamSpace())); + isl_set *RuntimeContext = + S.addNonEmptyDomainConstraints(S.getRuntimeCheckContext()); + + for (ScopStmt &Stmt : S) { + for (MemoryAccess *MA : Stmt) { + if (!MA->isInvariantLoad()) + continue; + + Value *PreloadVal = preloadInvariantLoad(*MA, Build, RuntimeContext); + ValueMap[MA->getAccessInstruction()] = PreloadVal; + + auto *SAI = S.getScopArrayInfo(MA->getBaseAddr()); + // isl_id *DerivedSAIId = SAI->getBasePtrId(); + // IDToValue[DerivedSAIId] = PreloadVal; + // isl_id_free(DerivedSAIId); + for (auto *DerivedSAI : SAI->getDerivedSAIs()) { + DerivedSAI->setBasePtr(PreloadVal); + } + } + } + + isl_set_free(RuntimeContext); + isl_ast_build_free(Build); +} + void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { for (unsigned i = 0; i < isl_set_dim(Context, isl_dim_param); ++i) { Index: test/Isl/CodeGen/exprModDiv.ll =================================================================== --- test/Isl/CodeGen/exprModDiv.ll +++ test/Isl/CodeGen/exprModDiv.ll @@ -13,11 +13,19 @@ ; useful as LLVM will translate urem and udiv operations with power-of-two ; denominators to fast bitwise and or shift operations. +; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d +; A[p + 127 * floord(-p - 1, 127) + 127] +; CHECK: %polly.access.A6 = getelementptr float, float* %A, i64 0 + +; B[p / 127] +; CHECK: %pexp.div = sdiv exact i64 %p, 127 +; CHECK: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.div + ; A[i % 127] ; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 127 -; CHECK: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r +; CHECK: %polly.access.A8 = getelementptr float, float* %A, i64 %pexp.pdiv_r -; A[floor(i / 127)] +; B[floor(i / 127)] ; ; Note: without the floor, we would create a map i -> i/127, which only contains ; values of i that are divisible by 127. All other values of i would not @@ -25,47 +33,23 @@ ; each value of i to indeed be mapped to a value. ; ; CHECK: %pexp.p_div_q = udiv i64 %polly.indvar, 127 -; CHECK: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.p_div_q +; CHECK: %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d -; A[p + 127 * floord(-p - 1, 127) + 127] -; CHECK: %20 = sub nsw i64 0, %p -; CHECK: %21 = sub nsw i64 %20, 1 -; CHECK: %pexp.fdiv_q.0 = sub i64 %21, 127 -; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1 -; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %21, 0 -; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %21 -; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127 -; CHECK: %22 = mul nsw i64 127, %pexp.fdiv_q.4 -; CHECK: %23 = add nsw i64 %p, %22 -; CHECK: %24 = add nsw i64 %23, 127 -; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %24 +; A[p + 128 * floord(-p - 1, 128) + 128] +; POW2: %polly.access.A6 = getelementptr float, float* %A, i64 0 -; A[p / 127] -; CHECK: %pexp.div = sdiv exact i64 %p, 127 -; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div +; B[p / 128] +; POW2: %pexp.div = sdiv exact i64 %p, 128 +; POW2: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.div ; A[i % 128] ; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128 -; POW2: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r +; POW2: %polly.access.A8 = getelementptr float, float* %A, i64 %pexp.pdiv_r -; A[floor(i / 128)] +; B[floor(i / 128)] ; POW2: %pexp.p_div_q = udiv i64 %polly.indvar, 128 -; POW2: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.p_div_q - -; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d -; A[p + 128 * floord(-p - 1, 128) + 128] -; POW2: %20 = sub nsw i64 0, %p -; POW2: %21 = sub nsw i64 %20, 1 -; POW2: %polly.fdiv_q.shr = ashr i64 %21, 7 -; POW2: %22 = mul nsw i64 128, %polly.fdiv_q.shr -; POW2: %23 = add nsw i64 %p, %22 -; POW2: %24 = add nsw i64 %23, 128 -; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %24 - -; A[p / 128] -; POW2: %pexp.div = sdiv exact i64 %p, 128 -; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div +; POW2: %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/invariant_load.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/invariant_load.ll @@ -0,0 +1,39 @@ +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s +; +; CHECK-LABEL: polly.start: +; CHECK-NEXT: %polly.access.B3 = getelementptr i32, i32* %B, i64 0 +; CHECK-NEXT: %polly.access.B3.load = load i32, i32* %polly.access.B3 +; +; CHECK-LABEL: polly.stmt.bb2: +; CHECK-NEXT: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar +; CHECK-NEXT: store i32 %polly.access.B3.load, i32* %scevgep, align 4 +; +; void f(int *restrict A, int *restrict B) { +; for (int i = 0; i < 1024; i++) +; A[i] = *B; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias %A, i32* noalias %B) { +bb: + br label %bb1 + +bb1: ; preds = %bb4, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb5 + +bb2: ; preds = %bb1 + %tmp = load i32, i32* %B, align 4 + %tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp, i32* %tmp3, align 4 + br label %bb4 + +bb4: ; preds = %bb2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb5: ; preds = %bb1 + ret void +} Index: test/Isl/CodeGen/invariant_load_base_pointer.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/invariant_load_base_pointer.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -polly-no-early-exit -polly-codegen -polly-ignore-aliasing -polly-detect-unprofitable -S < %s | FileCheck %s +; +; CHECK-LABEL: polly.start: +; CHECK-NEXT: %polly.access.BPLoc = getelementptr i32*, i32** %BPLoc, i64 0 +; CHECK-NEXT: %polly.access.BPLoc.load = load i32*, i32** %polly.access.BPLoc +; +; CHECK-LABEL: polly.stmt.bb2: +; CHECK-NEXT: %p_tmp3 = getelementptr inbounds i32, i32* %polly.access.BPLoc.load, i64 %polly.indvar +; +; void f(int **BPLoc) { +; for (int i = 0; i < 1024; i++) +; (*BPLoc)[i] = 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32** %BPLoc) { +bb: + br label %bb1 + +bb1: ; preds = %bb4, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb5 + +bb2: ; preds = %bb1 + %tmp = load i32*, i32** %BPLoc, align 8 + %tmp3 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv + store i32 0, i32* %tmp3, align 4 + br label %bb4 + +bb4: ; preds = %bb2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb5: ; preds = %bb1 + ret void +} Index: test/Isl/CodeGen/invariant_load_base_pointer_conditional.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/invariant_load_base_pointer_conditional.ll @@ -0,0 +1,61 @@ +; RUN: opt %loadPolly -polly-no-early-exit -polly-codegen -polly-ignore-aliasing -polly-detect-unprofitable -S < %s | FileCheck %s +; +; CHECK-LABEL: polly.start: +; CHECK-NEXT: %0 = sext i32 %N to i64 +; CHECK-NEXT: %1 = icmp sge i64 %0, 514 +; CHECK-NEXT: br label %polly.inv.preload.cond +; +; CHECK-LABEL: polly.inv.preload.cond: +; CHECK-NEXT: br i1 %1, label %polly.inv.preload.exec, label %polly.inv.preload.merge +; +; CHECK-LABEL: polly.inv.preload.merge: +; CHECK-NEXT: %polly.inv.preload.tmp6.merge = phi i32* [ %polly.access.BPLoc.load, %polly.inv.preload.exec ], [ undef, %polly.inv.preload.cond ] +; +; CHECK-LABEL: polly.stmt.bb5: +; CHECK-NEXT: %p_tmp7 = getelementptr inbounds i32, i32* %polly.inv.preload.tmp6.merge, i64 %polly.indvar6 +; +; void f(int **BPLoc, int *A, int N) { +; for (int i = 0; i < N; i++) +; if (i > 512) +; (*BPLoc)[i] = 0; +; else +; A[i] = 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32** %BPLoc, i32* %A, i32 %N) { +bb: + %tmp = sext i32 %N to i64 + br label %bb1 + +bb1: ; preds = %bb11, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ] + %tmp2 = icmp slt i64 %indvars.iv, %tmp + br i1 %tmp2, label %bb3, label %bb12 + +bb3: ; preds = %bb1 + %tmp4 = icmp sgt i64 %indvars.iv, 512 + br i1 %tmp4, label %bb5, label %bb8 + +bb5: ; preds = %bb3 + %tmp6 = load i32*, i32** %BPLoc, align 8 + %tmp7 = getelementptr inbounds i32, i32* %tmp6, i64 %indvars.iv + store i32 0, i32* %tmp7, align 4 + br label %bb10 + +bb8: ; preds = %bb3 + %tmp9 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 0, i32* %tmp9, align 4 + br label %bb10 + +bb10: ; preds = %bb8, %bb5 + br label %bb11 + +bb11: ; preds = %bb10 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb12: ; preds = %bb1 + ret void +} Index: test/Isl/CodeGen/invariant_load_condition.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/invariant_load_condition.ll @@ -0,0 +1,46 @@ +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s +; +; CHECK-LABEL: polly.start: +; CHECK-NEXT: %polly.access.C3 = getelementptr i32, i32* %C, i64 0 +; CHECK-NEXT: %polly.access.C3.load = load i32, i32* %polly.access.C3 +; +; CHECK-LABEL: polly.stmt.bb2: +; CHECK-NEXT: icmp eq i32 %polly.access.C3.load, 0 + +; void f(int *A, int *C) { +; for (int i = 0; i < 1024; i++) +; if (*C) +; A[i] = 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %C) { +bb: + br label %bb1 + +bb1: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb8 + +bb2: ; preds = %bb1 + %tmp = load i32, i32* %C, align 4 + %tmp3 = icmp eq i32 %tmp, 0 + br i1 %tmp3, label %bb6, label %bb4 + +bb4: ; preds = %bb2 + %tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 0, i32* %tmp5, align 4 + br label %bb6 + +bb6: ; preds = %bb2, %bb4 + br label %bb7 + +bb7: ; preds = %bb6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb8: ; preds = %bb1 + ret void +} Index: test/Isl/CodeGen/invariant_load_loop_ub.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/invariant_load_loop_ub.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -polly-codegen -polly-detect-unprofitable -S < %s | FileCheck %s +; +; TODO: This is a negative test as we cannot handle loops with a +; trip count ScalarEvolution cannot handle. However, once +; this changes we should be able to detect this. +; +; CHECK-NOT: polly.start +; +; void f(int *A, int *UB) { +; for (int i = 0; i < *UB; i++) +; A[i] = 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %UB) { +bb: + br label %bb1 + +bb1: ; preds = %bb6, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb6 ], [ 0, %bb ] + %tmp = load i32, i32* %UB, align 4 + %tmp2 = sext i32 %tmp to i64 + %tmp3 = icmp slt i64 %indvars.iv, %tmp2 + br i1 %tmp3, label %bb4, label %bb7 + +bb4: ; preds = %bb1 + %tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 0, i32* %tmp5, align 4 + br label %bb6 + +bb6: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb7: ; preds = %bb1 + ret void +} Index: test/Isl/CodeGen/invariant_load_ptr_ptr_noalias.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/invariant_load_ptr_ptr_noalias.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-codegen -polly-ignore-aliasing -S -polly-no-early-exit < %s | FileCheck %s +; +; CHECK: polly.start: +; CHECK: %polly.access.A = getelementptr i32**, i32*** %A, i64 42 +; CHECK: %polly.access.A.load = load i32**, i32*** %polly.access.A, !alias.scope !0, !noalias !2 +; CHECK: %polly.access.polly.access.A.load = getelementptr i32*, i32** %polly.access.A.load, i64 32 +; CHECK: %polly.access.polly.access.A.load.load = load i32*, i32** %polly.access.polly.access.A.load +; +; CHECK: polly.stmt.bb2: +; CHECK: %p_tmp6 = getelementptr inbounds i32, i32* %polly.access.polly.access.A.load.load, i64 %polly.indvar +; CHECK: store i32 0, i32* %p_tmp6, align 4 +; +; void f(int ***A) { +; for (int i = 0; i < 1024; i++) +; A[42][32][i] = 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32*** %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb8 + +bb2: ; preds = %bb1 + %tmp = getelementptr inbounds i32**, i32*** %A, i64 42 + %tmp3 = load i32**, i32*** %tmp, align 8 + %tmp4 = getelementptr inbounds i32*, i32** %tmp3, i64 32 + %tmp5 = load i32*, i32** %tmp4, align 8 + %tmp6 = getelementptr inbounds i32, i32* %tmp5, i64 %indvars.iv + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb8: ; preds = %bb1 + ret void +} Index: test/Isl/CodeGen/non-affine-phi-node-expansion.ll =================================================================== --- test/Isl/CodeGen/non-affine-phi-node-expansion.ll +++ test/Isl/CodeGen/non-affine-phi-node-expansion.ll @@ -3,6 +3,10 @@ %struct.wombat = type {[4 x i32]} +; CHECK: polly.start: +; CHECK-NEXT: %polly.access.B4 = getelementptr i32, i32* %B, i64 0 +; CHECK-NEXT: %polly.access.B4.load = load i32, i32* %polly.access.B4 + ; CHECK: polly.stmt.bb3.entry: ; preds = %polly.start ; CHECK: br label %polly.stmt.bb3 @@ -15,8 +19,7 @@ ; CHECK: br label %polly.stmt.bb13.exit ; CHECK: polly.stmt.bb5: ; preds = %polly.stmt.bb3 -; CHECK: %tmp7_p_scalar_ = load i32, i32* %B, !alias.scope !0, !noalias !2 -; CHECK: store i32 %tmp7_p_scalar_, i32* %polly.access.cast.arg1, !alias.scope !3, !noalias !4 +; CHECK: store i32 %polly.access.B4.load, i32* %polly.access.cast.arg1, !alias.scope !3, !noalias !4 ; CHECK: br label %polly.stmt.bb13.exit ; Function Attrs: nounwind uwtable Index: test/Isl/CodeGen/simple_vec_call.ll =================================================================== --- test/Isl/CodeGen/simple_vec_call.ll +++ test/Isl/CodeGen/simple_vec_call.ll @@ -24,19 +24,13 @@ ret void } -; CHECK: %value_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8 -; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0 -; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1 -; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2 -; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3 -; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %0) [[NUW:#[0-9]+]] -; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %1) [[NUW]] -; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %2) [[NUW]] -; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %3) [[NUW]] -; CHECK: %4 = insertelement <4 x float> undef, float [[RES1]], i32 0 -; CHECK: %5 = insertelement <4 x float> %4, float [[RES2]], i32 1 -; CHECK: %6 = insertelement <4 x float> %5, float [[RES3]], i32 2 -; CHECK: %7 = insertelement <4 x float> %6, float [[RES4]], i32 3 -; CHECK: store <4 x float> %7 +; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW:#[0-9]+]] +; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] +; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] +; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] +; CHECK: %5 = insertelement <4 x float> undef, float [[RES1]], i32 0 +; CHECK: %6 = insertelement <4 x float> %5, float [[RES2]], i32 1 +; CHECK: %7 = insertelement <4 x float> %6, float [[RES3]], i32 2 +; CHECK: %8 = insertelement <4 x float> %7, float [[RES4]], i32 3 +; CHECK: store <4 x float> %8 ; CHECK: attributes [[NUW]] = { nounwind } Index: test/Isl/CodeGen/simple_vec_call_2.ll =================================================================== --- test/Isl/CodeGen/simple_vec_call_2.ll +++ test/Isl/CodeGen/simple_vec_call_2.ll @@ -24,19 +24,13 @@ ret void } -; CHECK: %value_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8 -; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0 -; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1 -; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2 -; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3 -; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %0) [[NUW:#[0-9]+]] -; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %1) [[NUW]] -; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %2) [[NUW]] -; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %3) [[NUW]] -; CHECK: %4 = insertelement <4 x float**> undef, float** %p_result, i32 0 -; CHECK: %5 = insertelement <4 x float**> %4, float** %p_result1, i32 1 -; CHECK: %6 = insertelement <4 x float**> %5, float** %p_result2, i32 2 -; CHECK: %7 = insertelement <4 x float**> %6, float** %p_result3, i32 3 -; CHECK: store <4 x float**> %7, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align +; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW:#[0-9]+]] +; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] +; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] +; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] +; CHECK: %0 = insertelement <4 x float**> undef, float** %p_result, i32 0 +; CHECK: %1 = insertelement <4 x float**> %0, float** %p_result1, i32 1 +; CHECK: %2 = insertelement <4 x float**> %1, float** %p_result2, i32 2 +; CHECK: %3 = insertelement <4 x float**> %2, float** %p_result3, i32 3 +; CHECK: store <4 x float**> %3, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align ; CHECK: attributes [[NUW]] = { nounwind } Index: test/Isl/CodeGen/simple_vec_cast.ll =================================================================== --- test/Isl/CodeGen/simple_vec_cast.ll +++ test/Isl/CodeGen/simple_vec_cast.ll @@ -28,8 +28,8 @@ ret void } -; CHECK: %tmp_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8, !alias.scope !0, !noalias !2 -; CHECK: %tmp_p_splat = shufflevector <1 x float> %tmp_p_splat_one, <1 x float> %tmp_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %0 = fpext <4 x float> %tmp_p_splat to <4 x double> -; CHECK: store <4 x double> %0, <4 x double>* bitcast ([1024 x double]* @B to <4 x double>*), align 8, !alias.scope !3, !noalias !4 +; CHECK-NOT: load <1 x float> +; CHECK: %tmp_p_splat = shufflevector <1 x float> %0, <1 x float> %0, <4 x i32> zeroinitializer +; CHECK: %1 = fpext <4 x float> %tmp_p_splat to <4 x double> +; CHECK: store <4 x double> %1, <4 x double>* bitcast ([1024 x double]* @B to <4 x double>*), align 8, !alias.scope !3, !noalias !4 Index: test/Isl/CodeGen/simple_vec_const.ll =================================================================== --- test/Isl/CodeGen/simple_vec_const.ll +++ test/Isl/CodeGen/simple_vec_const.ll @@ -52,5 +52,5 @@ } -; CHECK: load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*) +; CHECK-NOT: load <1 x float> ; CHECK: shufflevector <1 x float> {{.*}}, <1 x float> {{.*}} <4 x i32> zeroinitializer Index: test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll =================================================================== --- test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll +++ test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll @@ -22,6 +22,6 @@ return: ret void } -; CHECK: %value_p_splat_one = load <1 x float**>, <1 x float**>* bitcast ([1024 x float**]* @A to <1 x float**>*), align 8 -; CHECK: %value_p_splat = shufflevector <1 x float**> %value_p_splat_one, <1 x float**> %value_p_splat_one, <4 x i32> zeroinitializer +; CHECK-NOT: load <1 x float**> +; CHECK: %value_p_splat = shufflevector <1 x float**> %0, <1 x float**> %0, <4 x i32> zeroinitializer ; CHECK: store <4 x float**> %value_p_splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8 Index: test/ScopDetectionDiagnostics/ReportVariantBasePtr-01.ll =================================================================== --- test/ScopDetectionDiagnostics/ReportVariantBasePtr-01.ll +++ test/ScopDetectionDiagnostics/ReportVariantBasePtr-01.ll @@ -6,7 +6,7 @@ ; ; void a(struct b *A) { ; for (int i=0; i<32; i++) -; A->b[i] = 0; +; A[i].b[i] = 0; ; } ; CHECK: remark: ReportVariantBasePtr01.c:6:8: The following errors keep this region from being a Scop. @@ -23,11 +23,11 @@ entry.split: ; preds = %entry tail call void @llvm.dbg.value(metadata %struct.b* %A, i64 0, metadata !16, metadata !DIExpression()), !dbg !23 tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !25 - %b = getelementptr inbounds %struct.b, %struct.b* %A, i64 0, i32 0, !dbg !26 br label %for.body, !dbg !27 for.body: ; preds = %for.body, %entry.split %indvar4 = phi i64 [ %indvar.next, %for.body ], [ 0, %entry.split ] + %b = getelementptr inbounds %struct.b, %struct.b* %A, i64 %indvar4, i32 0, !dbg !26 %0 = mul i64 %indvar4, 4, !dbg !26 %1 = add i64 %0, 3, !dbg !26 %2 = add i64 %0, 2, !dbg !26 Index: test/ScopInfo/invariant_load.ll =================================================================== --- /dev/null +++ test/ScopInfo/invariant_load.ll @@ -0,0 +1,34 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] [Invariant] +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_B[0] }; +; +; void f(int *restrict A, int *restrict B) { +; for (int i = 0; i < 1024; i++) +; A[i] = *B; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias %A, i32* noalias %B) { +bb: + br label %bb1 + +bb1: ; preds = %bb4, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb5 + +bb2: ; preds = %bb1 + %tmp = load i32, i32* %B, align 4 + %tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp, i32* %tmp3, align 4 + br label %bb4 + +bb4: ; preds = %bb2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb5: ; preds = %bb1 + ret void +} Index: test/ScopInfo/invariant_load_base_pointer.ll =================================================================== --- /dev/null +++ test/ScopInfo/invariant_load_base_pointer.ll @@ -0,0 +1,34 @@ +; RUN: opt %loadPolly -polly-scops -polly-ignore-aliasing -polly-detect-unprofitable -analyze < %s | FileCheck %s +; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] [Invariant] +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_BPLoc[0] }; +; +; void f(int **BPLoc) { +; for (int i = 0; i < 1024; i++) +; (*BPLoc)[i] = 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32** %BPLoc) { +bb: + br label %bb1 + +bb1: ; preds = %bb4, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb5 + +bb2: ; preds = %bb1 + %tmp = load i32*, i32** %BPLoc, align 8 + %tmp3 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv + store i32 0, i32* %tmp3, align 4 + br label %bb4 + +bb4: ; preds = %bb2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb5: ; preds = %bb1 + ret void +} Index: test/ScopInfo/invariant_load_base_pointer_conditional.ll =================================================================== --- /dev/null +++ test/ScopInfo/invariant_load_base_pointer_conditional.ll @@ -0,0 +1,50 @@ +; RUN: opt %loadPolly -polly-scops -polly-ignore-aliasing -polly-detect-unprofitable -analyze < %s | FileCheck %s +; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] [Invariant] +; CHECK-NEXT: [N] -> { Stmt_bb5[i0] -> MemRef_BPLoc[0] }; +; +; void f(int **BPLoc, int *A, int N) { +; for (int i = 0; i < N; i++) +; if (i > 512) +; (*BPLoc)[i] = 0; +; else +; A[i] = 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32** %BPLoc, i32* %A, i32 %N) { +bb: + %tmp = sext i32 %N to i64 + br label %bb1 + +bb1: ; preds = %bb11, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ] + %tmp2 = icmp slt i64 %indvars.iv, %tmp + br i1 %tmp2, label %bb3, label %bb12 + +bb3: ; preds = %bb1 + %tmp4 = icmp sgt i64 %indvars.iv, 512 + br i1 %tmp4, label %bb5, label %bb8 + +bb5: ; preds = %bb3 + %tmp6 = load i32*, i32** %BPLoc, align 8 + %tmp7 = getelementptr inbounds i32, i32* %tmp6, i64 %indvars.iv + store i32 0, i32* %tmp7, align 4 + br label %bb10 + +bb8: ; preds = %bb3 + %tmp9 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 0, i32* %tmp9, align 4 + br label %bb10 + +bb10: ; preds = %bb8, %bb5 + br label %bb11 + +bb11: ; preds = %bb10 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb12: ; preds = %bb1 + ret void +} Index: test/ScopInfo/invariant_load_condition.ll =================================================================== --- /dev/null +++ test/ScopInfo/invariant_load_condition.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] [Invariant] +; CHECK-NEXT: { Stmt_bb2__TO__bb6[i0] -> MemRef_C[0] }; +; +; void f(int *A, int *C) { +; for (int i = 0; i < 1024; i++) +; if (*C) +; A[i] = 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %C) { +bb: + br label %bb1 + +bb1: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb8 + +bb2: ; preds = %bb1 + %tmp = load i32, i32* %C, align 4 + %tmp3 = icmp eq i32 %tmp, 0 + br i1 %tmp3, label %bb6, label %bb4 + +bb4: ; preds = %bb2 + %tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 0, i32* %tmp5, align 4 + br label %bb6 + +bb6: ; preds = %bb2, %bb4 + br label %bb7 + +bb7: ; preds = %bb6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb8: ; preds = %bb1 + ret void +} Index: test/ScopInfo/invariant_load_loop_ub.ll =================================================================== --- /dev/null +++ test/ScopInfo/invariant_load_loop_ub.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly -polly-scops -polly-detect-unprofitable -analyze < %s | FileCheck %s +; +; TODO: This is a negative test as we cannot handle loops with a +; trip count ScalarEvolution cannot handle. However, once +; this changes we should be able to detect this. +; +; CHECK-NOT: Context +; +; __CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] [Invariant] +; __CHECK-NEXT: { Stmt_bb1[i0] -> MemRef_UB[0] }; +; +; void f(int *A, int *UB) { +; for (int i = 0; i < *UB; i++) +; A[i] = 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %UB) { +bb: + br label %bb1 + +bb1: ; preds = %bb6, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb6 ], [ 0, %bb ] + %tmp = load i32, i32* %UB, align 4 + %tmp2 = sext i32 %tmp to i64 + %tmp3 = icmp slt i64 %indvars.iv, %tmp2 + br i1 %tmp3, label %bb4, label %bb7 + +bb4: ; preds = %bb1 + %tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 0, i32* %tmp5, align 4 + br label %bb6 + +bb6: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb7: ; preds = %bb1 + ret void +} Index: test/ScopInfo/invariant_load_ptr_ptr_noalias.ll =================================================================== --- test/ScopInfo/invariant_load_ptr_ptr_noalias.ll +++ test/ScopInfo/invariant_load_ptr_ptr_noalias.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -tbaa -polly-scops -polly-ignore-aliasing -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-scops -polly-ignore-aliasing -analyze < %s | FileCheck %s ; ; CHECK: Arrays { ; CHECK: i32** MemRef_A[*][8] @@ -12,6 +12,11 @@ ; CHECK: i32 MemRef_tmp5[*][ { [] -> [(4)] } ] [BasePtrOrigin: MemRef_tmp3] ; CHECK: } ; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] [Invariant] +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_A[42] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] [Invariant] +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_tmp3[32] }; +; ; void f(int ***A) { ; for (int i = 0; i < 1024; i++) ; A[42][32][i] = 0; @@ -30,11 +35,11 @@ bb2: ; preds = %bb1 %tmp = getelementptr inbounds i32**, i32*** %A, i64 42 - %tmp3 = load i32**, i32*** %tmp, align 8, !tbaa !1 + %tmp3 = load i32**, i32*** %tmp, align 8 %tmp4 = getelementptr inbounds i32*, i32** %tmp3, i64 32 - %tmp5 = load i32*, i32** %tmp4, align 8, !tbaa !1 + %tmp5 = load i32*, i32** %tmp4, align 8 %tmp6 = getelementptr inbounds i32, i32* %tmp5, i64 %indvars.iv - store i32 0, i32* %tmp6, align 4, !tbaa !5 + store i32 0, i32* %tmp6, align 4 br label %bb7 bb7: ; preds = %bb2 @@ -44,11 +49,3 @@ bb8: ; preds = %bb1 ret void } - -!0 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git 9e282ff441e7a367dc711e41fd19d27ffc0f78d6)"} -!1 = !{!2, !2, i64 0} -!2 = !{!"any pointer", !3, i64 0} -!3 = !{!"omnipotent char", !4, i64 0} -!4 = !{!"Simple C/C++ TBAA"} -!5 = !{!6, !6, i64 0} -!6 = !{!"int", !3, i64 0}