Index: include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- include/llvm/Analysis/LoopAccessAnalysis.h +++ include/llvm/Analysis/LoopAccessAnalysis.h @@ -97,6 +97,16 @@ /// Set of potential dependent memory accesses. typedef EquivalenceClasses DepCandidates; + /// Type to keep track of the status of the dependence check. + enum class StatusTy { + // Can vectorize safely without RT checks. + Safe, + // Can vectorize with RT checks. + SafeWithRtChecks, + // Cannot vectorize, because we found unknown or known unsafe dependences. + Unsafe, + }; + /// Dependece between memory access instructions. struct Dependence { /// The type of the dependence. @@ -146,7 +156,7 @@ Instruction *getDestination(const LoopAccessInfo &LAI) const; /// Dependence types that don't prevent vectorization. - static bool isSafeForVectorization(DepType Type); + static StatusTy isSafeForVectorization(DepType Type); /// Lexically forward dependence. bool isForward() const; @@ -164,7 +174,7 @@ MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L) : PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeRegisterWidth(-1U), - ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true), + ShouldRetryWithRuntimeCheck(false), Status(StatusTy::Safe), RecordDependences(true) {} /// Register the location (instructions are given increasing numbers) @@ -193,7 +203,7 @@ /// No memory dependence was encountered that would inhibit /// vectorization. - bool isSafeForVectorization() const { return SafeForVectorization; } + bool isSafeForVectorization() const { return Status == StatusTy::Safe; } /// The maximum number of bytes of a vector register we can vectorize /// the accesses safely with. @@ -205,7 +215,9 @@ /// In same cases when the dependency check fails we can still /// vectorize the loop with a dynamic array access check. - bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; } + bool shouldRetryWithRuntimeCheck() const { + return ShouldRetryWithRuntimeCheck && Status == StatusTy::SafeWithRtChecks; + } /// Returns the memory dependences. If null is returned we exceeded /// the MaxDependences threshold and this information is not @@ -269,9 +281,10 @@ /// vectorize this loop with runtime checks. bool ShouldRetryWithRuntimeCheck; - /// No memory dependence was encountered that would inhibit - /// vectorization. - bool SafeForVectorization; + /// Result of the dependence checks, indicating whether the checked + /// dependences are safe for vectorization, require RT checks or are known to + /// be unsafe. + StatusTy Status; //// True if Dependences reflects the dependences in the //// loop. If false we exceeded MaxDependences and @@ -304,6 +317,10 @@ /// \return false if we shouldn't vectorize at all or avoid larger /// vectorization factors by limiting MaxSafeDepDistBytes. bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize); + + /// Updates the current safety status with \p S. We can go from Safe to + /// either SafeWithRtChecks or Unsafe and from SafeWithRtChecks to Unsafe. + void mergeInStatus(StatusTy S); }; /// Holds information about the memory runtime legality checks to verify Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -1221,18 +1221,20 @@ return X == PtrSCEVB; } -bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { +MemoryDepChecker::StatusTy +MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { switch (Type) { case NoDep: case Forward: case BackwardVectorizable: - return true; + return StatusTy::Safe; case Unknown: + return StatusTy::SafeWithRtChecks; case ForwardButPreventsForwarding: case Backward: case BackwardVectorizableButPreventsForwarding: - return false; + return StatusTy::Unsafe; } llvm_unreachable("unexpected DepType!"); } @@ -1317,6 +1319,33 @@ return false; } +void MemoryDepChecker::mergeInStatus(StatusTy S) { + switch (Status) { + case StatusTy::Safe: + switch (S) { + case StatusTy::Safe: + return; + case StatusTy::SafeWithRtChecks: + LLVM_FALLTHROUGH; + case StatusTy::Unsafe: + Status = S; + return; + } + case StatusTy::SafeWithRtChecks: + switch (S) { + case StatusTy::Safe: + LLVM_FALLTHROUGH; + case StatusTy::SafeWithRtChecks: + return; + case StatusTy::Unsafe: + Status = StatusTy::Unsafe; + return; + } + case StatusTy::Unsafe: + return; + } +} + /// Given a non-constant (unknown) dependence-distance \p Dist between two /// memory accesses, that have the same stride whose absolute value is given /// in \p Stride, and that have the same type size \p TypeByteSize, @@ -1652,7 +1681,7 @@ Dependence::DepType Type = isDependent(*A.first, A.second, *B.first, B.second, Strides); - SafeForVectorization &= Dependence::isSafeForVectorization(Type); + mergeInStatus(Dependence::isSafeForVectorization(Type)); // Gather dependences unless we accumulated MaxDependences // dependences. In that case return as soon as we find the first @@ -1669,7 +1698,7 @@ << "Too many dependences, stopped recording\n"); } } - if (!RecordDependences && !SafeForVectorization) + if (!RecordDependences && Status == StatusTy::Unsafe) return false; } ++OI; @@ -1679,7 +1708,7 @@ } LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n"); - return SafeForVectorization; + return Status == StatusTy::Safe; } SmallVector Index: test/Transforms/LoopVectorize/runtime-check.ll =================================================================== --- test/Transforms/LoopVectorize/runtime-check.ll +++ test/Transforms/LoopVectorize/runtime-check.ll @@ -117,6 +117,48 @@ ret void } +; Check we do not generate runtime checks if we found a known dependence preventing +; vectorization. In this case, it is a read of c[i-1] followed by a write of c[i]. +; The runtime checks would always fail. + +; void test_runtime_check2(float *a, float b, unsigned offset, unsigned offset2, unsigned n, float *c) { +; for (unsigned i = 1; i < n; i++) { +; a[i+o1] += a[i+o2] + b; +; c[i] = c[i-1] + b; +; } +; } +; +; CHECK-LABEL: test_runtime_check2 +; CHECK-NOT: <4 x float> +define void @test_runtime_check2(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n, float* %c) { +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %ind.sum = add i64 %iv, %offset + %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum + %l1 = load float, float* %arr.idx, align 4 + %ind.sum2 = add i64 %iv, %offset2 + %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2 + %l2 = load float, float* %arr.idx2, align 4 + %m = fmul fast float %b, %l2 + %ad = fadd fast float %l1, %m + store float %ad, float* %arr.idx, align 4 + %c.ind = add i64 %iv, -1 + %c.idx = getelementptr inbounds float, float* %c, i64 %c.ind + %lc = load float, float* %c.idx, align 4 + %vc = fadd float %lc, 1.0 + %c.idx2 = getelementptr inbounds float, float* %c, i64 %iv + store float %vc, float* %c.idx2 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %loopexit, label %for.body + +loopexit: + ret void +} + ; CHECK: !9 = !DILocation(line: 101, column: 1, scope: !{{.*}}) !llvm.module.flags = !{!0, !1}