diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -663,18 +663,6 @@ bool Assume) { Value *Ptr = Access.getPointer(); - if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume)) - return false; - - // When we run after a failing dependency check we have to make sure - // we don't have wrapping pointers. - if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) { - auto *Expr = PSE.getSCEV(Ptr); - if (!Assume || !isa(Expr)) - return false; - PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); - } - // The id of the dependence set. unsigned DepId; @@ -688,6 +676,18 @@ // Each access has its own dependence set. DepId = RunningDepId++; + if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume)) + return false; + + // When we run after a failing dependency check we have to make sure + // we don't have wrapping pointers. + if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) { + auto *Expr = PSE.getSCEV(Ptr); + if (!Assume || !isa(Expr)) + return false; + PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); + } + bool IsWrite = Access.getInt(); RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); @@ -772,7 +772,7 @@ // dependence sets (in which case RunningDepId > 2) or if we need to re-try // any bound checks (because in that case the number of dependence sets is // incomplete). - bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty(); + bool NeedsAliasSetRTCheck = RunningDepId > 2; // We need to perform run-time alias checks, but some pointers had bounds // that couldn't be checked. diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll b/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll @@ -43,15 +43,20 @@ ret void } -; TODO: We cannot compute the bound for %arrayidxA_ub, because the index is +; We cannot compute the bound for %arrayidxA_ub, because the index is ; loaded on each iteration. As %a and %b are no-alias, no memchecks are required ; and unknown bounds should not prevent further analysis. define void @loaded_bound(i16* noalias %a, i16* noalias %b) { ; CHECK-LABEL: loaded_bound ; CHECK-NEXT: for.body: -; CHECK-NEXT: Report: cannot identify array bounds -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop +; CHECK-NOT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %loadA_ub = load i16, i16* %arrayidxA_ub, align 2 -> +; CHECK-NEXT: store i16 %mul, i16* %arrayidxA, align 2 +; CHECK: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -11,20 +11,20 @@ ; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 undef) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 1, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 1, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[TMP2]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP1]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP0]], [[TMP6]] ; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 undef) ; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = add i32 1, [[MUL_RESULT2]] -; CHECK-NEXT: [[TMP9:%.*]] = sub i32 1, [[MUL_RESULT2]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i32 [[TMP9]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 0, [[MUL_RESULT2]] +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 0, [[MUL_RESULT2]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i32 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP8]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = select i1 false, i1 [[TMP10]], i1 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW3]] ; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP7]], [[TMP13]] @@ -52,7 +52,7 @@ ; CHECK-NEXT: store <4 x float> [[WIDE_LOAD]], <4 x float>* [[TMP29]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -73,7 +73,7 @@ ; CHECK-NEXT: [[ARRAYIDX1209:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IDX_2]] ; CHECK-NEXT: store float [[LV]], float* [[ARRAYIDX1209]], align 4 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], undef -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ;