Index: include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- include/llvm/Analysis/LoopAccessAnalysis.h +++ include/llvm/Analysis/LoopAccessAnalysis.h @@ -432,6 +432,13 @@ /// Only used in DEBUG build but we don't want NDEBUG-dependent ABI. unsigned NumSymbolicStrides; + /// \brief Checks existence of store to invariant address inside loop. + /// If the loop has any store to invariant address, then it returns true, + /// else returns false. + bool hasStoreToLoopInvariantAddress() const { + return StoreToLoopInvariantAddress; + } + private: /// \brief Analyze the loop. Substitute symbolic strides using Strides. void analyzeLoop(const ValueToValueMap &Strides); @@ -469,6 +476,10 @@ /// \brief Cache the result of analyzeLoop. bool CanVecMem; + /// \brief Indicator for storing to uniform addresses. + /// If a loop has write to a loop invariant address then it should be true. + bool StoreToLoopInvariantAddress; + /// \brief The diagnostics report generated for the analysis. E.g. why we /// couldn't analyze the loop. Optional Report; Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -1044,16 +1044,8 @@ for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) { StoreInst *ST = cast(*I); Value* Ptr = ST->getPointerOperand(); - - if (isUniform(Ptr)) { - emitAnalysis( - LoopAccessReport(ST) - << "write to a loop invariant address could not be vectorized"); - DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n"); - CanVecMem = false; - return; - } - + // Check for store to loop invariant address. + StoreToLoopInvariantAddress |= isUniform(Ptr); // If we did *not* see this pointer before, insert it to the read-write // list. At this phase it is only a 'write' list. if (Seen.insert(Ptr).second) { @@ -1314,7 +1306,8 @@ const ValueToValueMap &Strides) : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0), - MaxSafeDepDistBytes(-1U), CanVecMem(false) { + MaxSafeDepDistBytes(-1U), CanVecMem(false), + StoreToLoopInvariantAddress(false) { if (canAnalyzeLoop()) analyzeLoop(Strides); } @@ -1327,6 +1320,10 @@ OS.indent(Depth) << "Memory dependences are safe with run-time checks\n"; } + OS.indent(Depth) << "Store to invariant address was " + << (StoreToLoopInvariantAddress ? "" : "not ") + << "found in loop.\n"; + if (Report) OS.indent(Depth) << "Report: " << Report->str() << "\n"; Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4008,7 +4008,13 @@ emitAnalysis(VectorizationReport(*OptionalReport)); if (!LAI->canVectorizeMemory()) return false; - + if (LAI->hasStoreToLoopInvariantAddress()) { + emitAnalysis( + VectorizationReport() + << "write to a loop invariant address could not be vectorized"); + DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); + return false; + } if (LAI->getNumRuntimePointerChecks() > VectorizerParams::RuntimeMemoryCheckThreshold) { emitAnalysis(VectorizationReport() Index: test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll =================================================================== --- test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll +++ test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-accesses -analyze | FileCheck %s + +; Test to confirm LAA will find store to invariant address. +; Inner loop has a store to invariant address. +; +; for(; i < itr; i++) { +; for(; j < itr; j++) { +; var1[i] = var2[j] + var1[i]; +; } +; } + +; CHECK: Store to invariant address was found in loop. +; CHECK-NOT: Store to invariant address was not found in loop. + +define i32 @foo(i32* nocapture %var1, i32* nocapture readonly %var2, i32 %itr) #0 { +entry: + %cmp20 = icmp eq i32 %itr, 0 + br i1 %cmp20, label %for.end10, label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc8 + %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ] + %j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ] + %cmp218 = icmp ult i32 %j.022, %itr + br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23 + %0 = zext i32 %j.022 to i64 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body3.lr.ph + %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4 + %2 = load i32, i32* %arrayidx5, align 4 + %add = add nsw i32 %2, %1 + store i32 %add, i32* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %itr + br i1 %exitcond, label %for.inc8, label %for.body3 + +for.inc8: ; preds = %for.body3, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ] + %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 + %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32 + %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr + br i1 %exitcond26, label %for.end10, label %for.cond1.preheader + +for.end10: ; preds = %for.inc8, %entry + ret i32 undef +} + Index: test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll =================================================================== --- test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll +++ test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -loop-accesses -analyze | FileCheck %s + +; Test to confirm LAA will not find store to invariant address. +; Inner loop has no store to invariant address. +; +; for(; i < itr; i++) { +; for(; j < itr; j++) { +; var2[j] = var2[j] + var1[i]; +; } +; } + +; CHECK: Store to invariant address was not found in loop. +; CHECK-NOT: Store to invariant address was found in loop. + + +define i32 @foo(i32* nocapture readonly %var1, i32* nocapture %var2, i32 %itr) #0 { +entry: + %cmp20 = icmp eq i32 %itr, 0 + br i1 %cmp20, label %for.end10, label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc8 + %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ] + %j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ] + %cmp218 = icmp ult i32 %j.022, %itr + br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23 + %0 = zext i32 %j.022 to i64 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body3.lr.ph + %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4 + %2 = load i32, i32* %arrayidx5, align 4 + %add = add nsw i32 %2, %1 + store i32 %add, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %itr + br i1 %exitcond, label %for.inc8, label %for.body3 + +for.inc8: ; preds = %for.body3, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ] + %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 + %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32 + %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr + br i1 %exitcond26, label %for.end10, label %for.cond1.preheader + +for.end10: ; preds = %for.inc8, %entry + ret i32 undef +} + Index: test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll =================================================================== --- test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll +++ test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-accesses -analyze | FileCheck %s + +; Test to confirm LAA will find store to invariant address. +; Inner loop has a store to invariant address. +; +; for(; i < itr; i++) { +; for(; j < itr; j++) { +; var1[j] = ++var2[i] + var1[j]; +; } +; } + +; CHECK: Store to invariant address was found in loop. + +define void @foo(i32* nocapture %var1, i32* nocapture %var2, i32 %itr) #0 { +entry: + %cmp20 = icmp sgt i32 %itr, 0 + br i1 %cmp20, label %for.cond1.preheader, label %for.end11 + +for.cond1.preheader: ; preds = %entry, %for.inc9 + %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc9 ], [ 0, %entry ] + %j.022 = phi i32 [ %j.1.lcssa, %for.inc9 ], [ 0, %entry ] + %cmp218 = icmp slt i32 %j.022, %itr + br i1 %cmp218, label %for.body3.lr.ph, label %for.inc9 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv23 + %0 = sext i32 %j.022 to i64 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body3.lr.ph + %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] + %1 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* %arrayidx, align 4 + %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv + %2 = load i32, i32* %arrayidx5, align 4 + %add = add nsw i32 %inc, %2 + store i32 %add, i32* %arrayidx5, align 4 + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %itr + br i1 %exitcond, label %for.inc9, label %for.body3 + +for.inc9: ; preds = %for.body3, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ] + %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 + %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32 + %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr + br i1 %exitcond26, label %for.end11, label %for.cond1.preheader + +for.end11: ; preds = %for.inc9, %entry + ret void +}