diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -932,8 +932,10 @@ // guaranteed to be stored at the end of the loop. Also, if decision to // vectorize loop is made, runtime checks are added so as to make sure that // invariant address won't alias with any other objects. - if (!LAI->getStoresToInvariantAddresses().empty()) { - // For each invariant address, check its last stored value is unconditional. + if (!LAI->getStoresToInvariantAddresses().empty() && + !getReductionVars().empty()) { + // For each invariant address, check its last stored value is unconditional + // and the address is not calculated inside the loop. for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) { if (isInvariantStoreOfReduction(SI) && blockNeedsPredication(SI->getParent())) { @@ -944,6 +946,19 @@ "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); return false; } + + // Invariant address should be defined outside of loop. LICM pass usually + // makes sure it happens, but in rare cases it does not, we do not want + // to overcomplicate vectorization to support this case. + auto *Ptr = dyn_cast(SI->getPointerOperand()); + if (Ptr && TheLoop->contains(Ptr)) { + reportVectorizationFailure( + "Invariant address is calculated inside the loop", + "write to a loop invariant address could not " + "be vectorized", + "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); + return false; + } } if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll --- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll @@ -513,3 +513,25 @@ exit: ret void } + +define void @reduc_store_invariant_calucalated_inside_loop(i32* %dst, i32* readonly %src) { +; CHECK-LABEL: @reduc_store_invariant_calucalated_inside_loop +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv + %0 = load i32, i32* %gep.src, align 4 + %add = add nsw i32 %sum, %0 + %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42 + store i32 %add, i32* %gep.dst, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %exit, label %for.body + +exit: + ret void +}