Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -522,9 +522,13 @@ /// \brief Check whether a pointer can participate in a runtime bounds check. static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, - Loop *L) { + Loop *L, bool Force) { const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); + + if (!AR && Force) + AR = dyn_cast(PSE.getAsAddRec(Ptr)); + if (!AR) return false; @@ -550,12 +554,48 @@ for (auto &AS : AST) { int NumReadPtrChecks = 0; int NumWritePtrChecks = 0; + bool CanDoAliasSetRT = true; // We assign consecutive id to access from different dependence sets. // Accesses within the same set don't need a runtime check. unsigned RunningDepId = 1; DenseMap DepSetId; + SmallVector Retries; + + auto createCheckForAccess = [&](MemAccessInfo Access, bool Force) { + bool IsDepCheckNeeded = isDependencyCheckNeeded(); + Value *Ptr = Access.getPointer(); + bool IsWrite = Access.getInt(); + + if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Force)) + return false; + + // When we run after a failing dependency check we have to make sure + // we don't have wrapping pointers. + if (ShouldCheckStride) + if (isStridedPtr(PSE, Ptr, TheLoop, StridesMap, Force) != 1) + return false; + + // The id of the dependence set. + unsigned DepId; + + if (IsDepCheckNeeded) { + Value *Leader = DepCands.getLeaderValue(Access).getPointer(); + unsigned &LeaderId = DepSetId[Leader]; + if (!LeaderId) + LeaderId = RunningDepId++; + DepId = LeaderId; + } else + // Each access has its own dependence set. + DepId = RunningDepId++; + + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); + DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); + + return true; + }; + for (auto A : AS) { Value *Ptr = A.getValue(); bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); @@ -566,30 +606,10 @@ else ++NumReadPtrChecks; - if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) && - // When we run after a failing dependency check we have to make sure - // we don't have wrapping pointers. - (!ShouldCheckStride || - isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) { - // The id of the dependence set. - unsigned DepId; - - if (IsDepCheckNeeded) { - Value *Leader = DepCands.getLeaderValue(Access).getPointer(); - unsigned &LeaderId = DepSetId[Leader]; - if (!LeaderId) - LeaderId = RunningDepId++; - DepId = LeaderId; - } else - // Each access has its own dependence set. - DepId = RunningDepId++; - - RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); - - DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); - } else { + if (!createCheckForAccess(Access, false)) { DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n'); - CanDoRT = false; + Retries.push_back(Access); + CanDoAliasSetRT = false; } } @@ -601,10 +621,24 @@ // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer // for which we couldn't find the bounds but we don't actually need to emit // any checks so it does not matter. - if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2)) - NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 && - NumWritePtrChecks >= 1)); + bool NeedsCheck = false; + if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) + NeedsCheck = (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 && + NumWritePtrChecks >= 1)); + + // We need to check things but some of the pointers couldn't be checked. + if (NeedsCheck && !CanDoAliasSetRT) { + // Reset the CanDoSetRt flag and retry all accesses that have failed. + CanDoAliasSetRT = true; + for (auto Access : Retries) + if (!createCheckForAccess(Access, true)) { + CanDoAliasSetRT = false; + break; + } + } + CanDoRT &= CanDoAliasSetRT; + NeedRTCheck |= NeedsCheck; ++ASId; } Index: test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll =================================================================== --- /dev/null +++ test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll @@ -0,0 +1,173 @@ +; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; When len has a type larger than i, i can oveflow in the following kernel: +; +; for (unsigned i = 0; i < len; i++) { +; B[i] = A[i] + A[i + 1]; +; } +; +; If accesses to A and B can alias, we need to emmit a run-time alias check +; between A and B. However, when i and i + 1 can wrap, their SCEV expression +; is not an AddRec. We need to create SCEV predicates and coerce the +; expressions to AddRecs in order to be able to emmit the run-time alias +; check. +; +; The SCEV expressions for ind.ext and i + 1 respectively are: +; (sext i32 {0,+,1}<%for.body> to i64) +; (sext i32 {1,+,1}<%for.body> to i64) +; +; We transformed expressions are: +; i64 {0,+,1} +; i64 {1,+,1} + +; CHECK-LABEL: test1 +; CHECK: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group +; CHECK-NEXT: %inc.ptr2 = getelementptr inbounds i32, i32* %B, i32 %idx +; CHECK-NEXT: Against group +; CHECK-NEXT: %inc.ptr1 = getelementptr inbounds i32, i32* %A, i32 %idx.inc +; CHECK-NEXT: %inc.ptr0 = getelementptr inbounds i32, i32* %A, i32 %idx +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group +; CHECK-NEXT: (Low: %B High: (-4 + (4 * (1 smax %len)) + %B)) +; CHECK-NEXT: Member: {%B,+,4} +; CHECK-NEXT: Group +; CHECK-NEXT: (Low: %A High: ((4 * (1 smax %len)) + %A)) +; CHECK-NEXT: Member: {(4 + %A),+,4} +; CHECK-NEXT: Member: {%A,+,4} +; CHECK: Store to invariant address was not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: +; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: +define void @test1(i32* %A, i32 *%B, i64 %len) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %idx = phi i32 [ 0, %entry ], [ %idx.inc, %for.body ] + %i = phi i64 [ 0, %entry ], [ %add1, %for.body ] + + %idx.ext = zext i32 %idx to i64 + %idx.ext.next = add i64 %idx.ext, 1 + %idx.inc = trunc i64 %idx.ext.next to i32 + + %inc.ptr0 = getelementptr inbounds i32, i32* %A, i32 %idx + %inc.ptr1 = getelementptr inbounds i32, i32* %A, i32 %idx.inc + + %ld1 = load i32, i32* %inc.ptr0, align 4 + %ld2 = load i32, i32* %inc.ptr1, align 4 + + %add = add i32 %ld1, %ld2 + + %inc.ptr2 = getelementptr inbounds i32, i32* %B, i32 %idx + store i32 %add, i32* %inc.ptr2 + + %add1 = add nsw i64 %i, 1 + %cmp = icmp slt i64 %add1, %len + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; This test is almost the same as the one above, except we zero extend +; the index. +; +; The SCEV expressions for ind.ext and i + 1 respectively are: +; (zext i32 {0,+,1}<%for.body> to i64) +; (zext i32 {1,+,1}<%for.body> to i64) + +; CHECK-LABEL: test2 +; CHECK: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group +; CHECK-NEXT: %inc.ptr2 = getelementptr inbounds i32, i32* %B, i64 %idx.ext +; CHECK-NEXT: Against group +; CHECK-NEXT: %inc.ptr1 = getelementptr inbounds i32, i32* %A, i64 %idx.inc.ext +; CHECK-NEXT: %inc.ptr0 = getelementptr inbounds i32, i32* %A, i64 %idx.ext +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group +; CHECK-NEXT: (Low: %B High: (-4 + (4 * (1 smax %len)) + %B)) +; CHECK-NEXT: Member: {%B,+,4} +; CHECK-NEXT: Group +; CHECK-NEXT: (Low: %A High: ((4 * (1 smax %len)) + %A)) +; CHECK-NEXT: Member: {(4 + %A),+,4} +; CHECK-NEXT: Member: {%A,+,4} +; CHECK: Store to invariant address was not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: +; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: + +define void @test2(i32* %A, i32 *%B, i64 %len) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %idx = phi i32 [ 0, %entry ], [ %idx.inc, %for.body ] + %i = phi i64 [ 0, %entry ], [ %add1, %for.body ] + + %idx.ext = zext i32 %idx to i64 + %idx.inc = add i32 %idx, 1 + %idx.inc.ext = zext i32 %idx.inc to i64 + + %inc.ptr0 = getelementptr inbounds i32, i32* %A, i64 %idx.ext + %inc.ptr1 = getelementptr inbounds i32, i32* %A, i64 %idx.inc.ext + + %ld1 = load i32, i32* %inc.ptr0, align 4 + %ld2 = load i32, i32* %inc.ptr1, align 4 + + %add = add i32 %ld1, %ld2 + + %inc.ptr2 = getelementptr inbounds i32, i32* %B, i64 %idx.ext + store i32 %add, i32* %inc.ptr2 + + %add1 = add nsw i64 %i, 1 + %cmp = icmp slt i64 %add1, %len + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; When len has a type larger than i, i can oveflow in the following kernel: +; for (unsigned i = 0; i < len; i++) { +; A[i] = A[i] + 1; +; } +; +; We do need to check that i doesn't wrap, but we don't need a run-time alias +; check. + +; CHECK-LABEL: test3 +; CHECK: Memory dependences are safe +; SCEV assumptions: +; {0,+,1}<%for.body> Added Flags: +define void @test3(i32* %A, i64 %len) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %A.idx = phi i32 [ 0, %entry ], [ %A.idx.inc, %for.body ] + %i = phi i64 [ 0, %entry ], [ %add1, %for.body ] + + %A.idx.inc = add i32 %A.idx, 1 + %inc.ptr = getelementptr inbounds i32, i32* %A, i32 %A.idx + + + %ld = load i32, i32* %inc.ptr, align 4 + %add = add i32 %ld, 1 + store i32 %add, i32* %inc.ptr + + %add1 = add nsw i64 %i, 1 + %cmp = icmp slt i64 %add1, %len + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +}