diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -949,6 +949,12 @@ DepIds.push_back(DepId); } + if (isa(PSE.getBackedgeTakenCount())) { + LLVM_DEBUG(dbgs() << "LAA: Cannot add runtime check for loop with unknown " + "number of iterations\n"); + return false; + } + for (auto &P : TranslatedPtrs) { const SCEV *PtrExpr = P.first; if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume)) @@ -993,13 +999,15 @@ Value *&UncomputablePtr, bool ShouldCheckWrap) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. - bool CanDoRT = true; + // + // A computable backedge taken count is required to generate runtime-checks. + bool CanDoRT = !isa(PSE.getBackedgeTakenCount()); bool MayNeedRTCheck = false; - if (!IsRTCheckAnalysisNeeded) return true; + if (!IsRTCheckAnalysisNeeded) + return true; bool IsDepCheckNeeded = isDependencyCheckNeeded(); - // We assign a consecutive id to access from different alias sets. // Accesses between different groups doesn't need to be checked. unsigned ASId = 0; @@ -1707,6 +1715,9 @@ // will be executed only if LoopCount >= VF, proving distance >= LoopCount // also guarantees that distance >= VF. // + if (isa(BackedgeTakenCount)) + return false; + const uint64_t ByteStride = Stride * TypeByteSize; const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride); const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step); @@ -2095,15 +2106,6 @@ return false; } - // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE->getBackedgeTakenCount(); - if (isa(ExitCount)) { - recordAnalysis("CantComputeNumberOfIterations") - << "could not determine number of loop iterations"; - LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); - return false; - } - return true; } @@ -2516,6 +2518,14 @@ if (!Stride) return; + const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); + + if (isa(BETakenCount)) { + LLVM_DEBUG(dbgs() << "LAA: Cannot analyze strided access, because a known " + "backedge-taken-count is required\n"); + return; + } + LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " "versioning:"); LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); @@ -2534,7 +2544,6 @@ // of using gather/scatters (if available). const SCEV *StrideExpr = PSE->getSCEV(Stride); - const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); // Match the types so we can compare the stride and the BETakenCount. // The Stride can be positive/negative, so we sign extend Stride; diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp --- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp @@ -799,6 +799,13 @@ "may not insert runtime check with convergent operation"); } + if (!Pred.isAlwaysTrue() && + isa(SE->getBackedgeTakenCount(L))) { + return fail( + "RuntimeCheckUnknownBackedgeTakenCount", + "Cannot create runtime checks with unknown backedge-taken count"); + } + // To keep things simple have an empty preheader before we version or clone // the loop. (Also split if this has no predecessor, i.e. entry, because we // rely on PH having a predecessor.) diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -558,6 +558,14 @@ return false; } + if (!LAI.getPSE().getPredicate().isAlwaysTrue() && + isa(PSE.getBackedgeTakenCount())) { + LLVM_DEBUG( + dbgs() + << "Cannot version loop with uncomputable backedge-taken count\n"); + return false; + } + // Point of no-return, start the transformation. First, version the loop // if necessary. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1331,6 +1331,16 @@ return false; } + if (isa(PSE.getBackedgeTakenCount())) { + reportVectorizationFailure("Uncomputable number of iterations", + "could not determine number of loop iterations", + "CantComputeNumberOfIterations", ORE, TheLoop); + if (DoExtraAnalysis) + Result = false; + else + return false; + } + LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" << (LAI->getRuntimePointerChecking()->Need ? " (with a runtime bound check)" diff --git a/llvm/test/Analysis/LoopAccessAnalysis/uncomputable-backedge-taken-count.ll b/llvm/test/Analysis/LoopAccessAnalysis/uncomputable-backedge-taken-count.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/uncomputable-backedge-taken-count.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/uncomputable-backedge-taken-count.ll @@ -3,9 +3,9 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" -; TODO: Loop iteration counts are only required if we generate memory -; runtime checks. Missing iteration counts should not prevent -; analysis, if no runtime checks are required. +; Loop iteration counts are only required if we generate memory +; runtime checks. Missing iteration counts should not prevent +; analysis, if no runtime checks are required. ; No memory checks are required, because base pointers do not alias and we have ; a forward dependence for %a. @@ -13,7 +13,14 @@ i16* noalias %b) { ; CHECK-LABEL: safe_forward_dependence ; CHECK: for.body: -; CHECK-NEXT: Report: could not determine number of loop iterations +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %loadA_plus_2 = load i16, i16* %arrayidxA_plus_2, align 2 -> +; CHECK-NEXT: store i16 %mul, i16* %arrayidxA, align 2 +; +; CHECK: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: ; entry: br label %for.body @@ -49,7 +56,15 @@ i16* noalias %b) { ; CHECK-LABEL: unsafe_backwards_dependence ; CHECK: for.body: -; CHECK-NEXT: Report: could not determine number of loop iterations +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Backward loop carried data dependence. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Backward: +; CHECK-NEXT: %loadA_plus_2 = load i16, i16* %arrayidxA_plus_2, align 2 -> +; CHECK-NEXT: store i16 %mul, i16* %arrayidxA, align 2 + +; CHECK: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: ; entry: br label %for.body @@ -83,7 +98,7 @@ define void @ptr_may_alias(i16* %a, i16* %b) { ; CHECK-LABEL: ptr_may_alias ; CHECK: for.body: -; CHECK-NEXT: Report: could not determine number of loop iterations +; CHECK-NEXT: Report: cannot identify array bounds ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopDistribute/uncomputable-backedge-taken-count.ll b/llvm/test/Transforms/LoopDistribute/uncomputable-backedge-taken-count.ll --- a/llvm/test/Transforms/LoopDistribute/uncomputable-backedge-taken-count.ll +++ b/llvm/test/Transforms/LoopDistribute/uncomputable-backedge-taken-count.ll @@ -8,7 +8,6 @@ ; Making the exit condition depend on a load would break current loop-distribute, ; because it requires all accesses to end up in either of the loops, but not both. -; TODO ; Can distribute with unknown backedge-taken count, because no runtime checks are ; required. define void @unknown_btc_distribute_no_checks_needed(i32* noalias %a, @@ -16,6 +15,12 @@ i32* noalias %d) { ; CHECK-LABEL: @unknown_btc_distribute_no_checks_needed( ; CHECK-NEXT: entry: +; CHECK-NEXT: br label %entry.split.ldist1 +; +; CHECK-LABEL: entry.split.ldist1: +; CHECK: br i1 false, label %entry.split, label %for.body.ldist1 +; +; CHECK-LABEL: entry.split: ; CHECK-NEXT: br label %for.body ; entry: diff --git a/llvm/test/Transforms/LoopLoadElim/uncomputable-backedge-taken-count.ll b/llvm/test/Transforms/LoopLoadElim/uncomputable-backedge-taken-count.ll --- a/llvm/test/Transforms/LoopLoadElim/uncomputable-backedge-taken-count.ll +++ b/llvm/test/Transforms/LoopLoadElim/uncomputable-backedge-taken-count.ll @@ -3,14 +3,17 @@ target datalayout = "e-m:o-i32:64-f80:128-n8:16:32:64-S128" -; TODO ; Make sure loop-load-elimination triggers for a loop with uncomputable ; backedge-taken counts when no runtime checks are required. define void @load_elim_no_runtime_checks(i32* noalias %A, i32* noalias %B, i32* noalias %C, i32 %N) { ; CHECK-LABEL: load_elim_no_runtime_checks ; CHECK-NEXT: entry: +; CHECK-NEXT: %load_initial = load i32, i32* %A, align 1 ; CHECK-NEXT: br label %for.body ; +; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %a_p1, %for.body ] +; CHECK: %c = mul i32 %store_forwarded, 2 +; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll --- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -121,6 +121,15 @@ ; YAML-NEXT: ... ; YAML-NEXT: --- !Analysis ; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: CantVectorizeInstruction +; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 } +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: instruction cannot be vectorized +; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize ; YAML-NEXT: Name: CantComputeNumberOfIterations ; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 } ; YAML-NEXT: Function: test_multiple_failures