Index: llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h +++ llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h @@ -97,6 +97,17 @@ /// Set of potential dependent memory accesses. typedef EquivalenceClasses DepCandidates; + /// Type to keep track of the status of the dependence check. The order of + /// the elements is important and has to be from most permissive to least + /// permissive. + enum class VectorizationSafetyStatus { + // Can vectorize safely without RT checks. All dependences are known to be + // safe. + Safe, + // Cannot vectorize due to unsafe or unknown dependencies. + Unsafe, + }; + /// Dependece between memory access instructions. struct Dependence { /// The type of the dependence. @@ -146,7 +157,7 @@ Instruction *getDestination(const LoopAccessInfo &LAI) const; /// Dependence types that don't prevent vectorization. - static bool isSafeForVectorization(DepType Type); + static VectorizationSafetyStatus isSafeForVectorization(DepType Type); /// Lexically forward dependence. bool isForward() const; @@ -164,8 +175,8 @@ MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L) : PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeRegisterWidth(-1U), - ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true), - RecordDependences(true) {} + ShouldRetryWithRuntimeCheck(false), + Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {} /// Register the location (instructions are given increasing numbers) /// of a write access. @@ -193,7 +204,9 @@ /// No memory dependence was encountered that would inhibit /// vectorization. - bool isSafeForVectorization() const { return SafeForVectorization; } + bool isSafeForVectorization() const { + return Status == VectorizationSafetyStatus::Safe; + } /// The maximum number of bytes of a vector register we can vectorize /// the accesses safely with. @@ -269,9 +282,9 @@ /// vectorize this loop with runtime checks. bool ShouldRetryWithRuntimeCheck; - /// No memory dependence was encountered that would inhibit - /// vectorization. - bool SafeForVectorization; + /// Result of the dependence checks, indicating whether the checked + /// dependences are safe for vectorization or not. + VectorizationSafetyStatus Status; //// True if Dependences reflects the dependences in the //// loop. If false we exceeded MaxDependences and @@ -304,6 +317,10 @@ /// \return false if we shouldn't vectorize at all or avoid larger /// vectorization factors by limiting MaxSafeDepDistBytes. bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize); + + /// Updates the current safety status with \p S. We can go from Safe to + /// to Unsafe. + void mergeInStatus(VectorizationSafetyStatus S); }; /// Holds information about the memory runtime legality checks to verify Index: llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp +++ llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp @@ -1221,18 +1221,19 @@ return X == PtrSCEVB; } -bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { +MemoryDepChecker::VectorizationSafetyStatus +MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { switch (Type) { case NoDep: case Forward: case BackwardVectorizable: - return true; + return VectorizationSafetyStatus::Safe; case Unknown: case ForwardButPreventsForwarding: case Backward: case BackwardVectorizableButPreventsForwarding: - return false; + return VectorizationSafetyStatus::Unsafe; } llvm_unreachable("unexpected DepType!"); } @@ -1317,6 +1318,11 @@ return false; } +void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) { + if (Status < S) + Status = S; +} + /// Given a non-constant (unknown) dependence-distance \p Dist between two /// memory accesses, that have the same stride whose absolute value is given /// in \p Stride, and that have the same type size \p TypeByteSize, @@ -1652,7 +1658,7 @@ Dependence::DepType Type = isDependent(*A.first, A.second, *B.first, B.second, Strides); - SafeForVectorization &= Dependence::isSafeForVectorization(Type); + mergeInStatus(Dependence::isSafeForVectorization(Type)); // Gather dependences unless we accumulated MaxDependences // dependences. In that case return as soon as we find the first @@ -1669,7 +1675,7 @@ << "Too many dependences, stopped recording\n"); } } - if (!RecordDependences && !SafeForVectorization) + if (!RecordDependences && !isSafeForVectorization()) return false; } ++OI; @@ -1679,7 +1685,7 @@ } LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n"); - return SafeForVectorization; + return isSafeForVectorization(); } SmallVector Index: llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll +++ llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll @@ -117,6 +117,46 @@ ret void } +; Check we do generate unnecessary runtime checks. They will always fail. + +; void test_runtime_check2(float *a, float b, unsigned offset, unsigned offset2, unsigned n, float *c) { +; for (unsigned i = 1; i < n; i++) { +; a[i+o1] += a[i+o2] + b; +; c[i] = c[i-1] + b; +; } +; } +; +; CHECK-LABEL: test_runtime_check2 +; CHECK: <4 x float> +define void @test_runtime_check2(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n, float* %c) { +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %ind.sum = add i64 %iv, %offset + %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum + %l1 = load float, float* %arr.idx, align 4 + %ind.sum2 = add i64 %iv, %offset2 + %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2 + %l2 = load float, float* %arr.idx2, align 4 + %m = fmul fast float %b, %l2 + %ad = fadd fast float %l1, %m + store float %ad, float* %arr.idx, align 4 + %c.ind = add i64 %iv, -1 + %c.idx = getelementptr inbounds float, float* %c, i64 %c.ind + %lc = load float, float* %c.idx, align 4 + %vc = fadd float %lc, 1.0 + %c.idx2 = getelementptr inbounds float, float* %c, i64 %iv + store float %vc, float* %c.idx2 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %loopexit, label %for.body + +loopexit: + ret void +} + ; CHECK: !9 = !DILocation(line: 101, column: 1, scope: !{{.*}}) !llvm.module.flags = !{!0, !1}