Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -522,6 +522,21 @@ Accesses.insert(MemAccessInfo(Ptr, true)); } + /// \brief Check if we can emit a run-time no-alias check for \p Access. + /// + /// Returns true if we can emit a run-time no alias check for \p Access. + /// If we can check this access, this also adds it to a dependence set and + /// adds a run-time to check for it to \p RtCheck. If \p Assume is true, + /// we will attempt to use additional run-time checks in order to get + /// the bounds of the pointer. + bool createCheckForAccess(RuntimePointerChecking &RtCheck, + MemAccessInfo Access, + const ValueToValueMap &Strides, + DenseMap &DepSetId, + Loop *TheLoop, unsigned &RunningDepId, + unsigned ASId, bool ShouldCheckStride, + bool Assume); + /// \brief Check whether we can check the pointers at runtime for /// non-intersection. /// @@ -599,7 +614,7 @@ /// \brief Check whether a pointer can participate in a runtime bounds check. static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, - Loop *L) { + Loop *L, bool Assume) { const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); // The bounds for loop-invariant pointer is trivial. @@ -607,6 +622,10 @@ return true; const SCEVAddRecExpr *AR = dyn_cast(PtrScev); + + if (!AR && Assume) + AR = PSE.getAsAddRec(Ptr); + if (!AR) return false; @@ -621,9 +640,54 @@ return true; int64_t Stride = getPtrStride(PSE, Ptr, L, Strides); - return Stride == 1; + if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) + return true; + + return false; } +bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, + MemAccessInfo Access, + const ValueToValueMap &StridesMap, + DenseMap &DepSetId, + Loop *TheLoop, unsigned &RunningDepId, + unsigned ASId, bool ShouldCheckWrap, + bool Assume) { + bool IsDepCheckNeeded = isDependencyCheckNeeded(); + Value *Ptr = Access.getPointer(); + bool IsWrite = Access.getInt(); + + if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume)) + return false; + + // When we run after a failing dependency check we have to make sure + // we don't have wrapping pointers. + if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) { + auto *Expr = PSE.getSCEV(Ptr); + if (!Assume || !isa(Expr)) + return false; + PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); + } + + // The id of the dependence set. + unsigned DepId; + + if (IsDepCheckNeeded) { + Value *Leader = DepCands.getLeaderValue(Access).getPointer(); + unsigned &LeaderId = DepSetId[Leader]; + if (!LeaderId) + LeaderId = RunningDepId++; + DepId = LeaderId; + } else + // Each access has its own dependence set. + DepId = RunningDepId++; + + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); + DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); + + return true; + } + bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, @@ -643,12 +707,15 @@ for (auto &AS : AST) { int NumReadPtrChecks = 0; int NumWritePtrChecks = 0; + bool CanDoAliasSetRT = true; // We assign consecutive id to access from different dependence sets. // Accesses within the same set don't need a runtime check. unsigned RunningDepId = 1; DenseMap DepSetId; + SmallVector Retries; + for (auto A : AS) { Value *Ptr = A.getValue(); bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); @@ -659,29 +726,11 @@ else ++NumReadPtrChecks; - if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) && - // When we run after a failing dependency check we have to make sure - // we don't have wrapping pointers. - (!ShouldCheckWrap || isNoWrap(PSE, StridesMap, Ptr, TheLoop))) { - // The id of the dependence set. - unsigned DepId; - - if (IsDepCheckNeeded) { - Value *Leader = DepCands.getLeaderValue(Access).getPointer(); - unsigned &LeaderId = DepSetId[Leader]; - if (!LeaderId) - LeaderId = RunningDepId++; - DepId = LeaderId; - } else - // Each access has its own dependence set. - DepId = RunningDepId++; - - RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); - - DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); - } else { + if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, + RunningDepId, ASId, ShouldCheckWrap, false)) { DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n'); - CanDoRT = false; + Retries.push_back(Access); + CanDoAliasSetRT = false; } } @@ -693,10 +742,27 @@ // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer // for which we couldn't find the bounds but we don't actually need to emit // any checks so it does not matter. - if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2)) - NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 && - NumWritePtrChecks >= 1)); + bool NeedsCheck = false; + if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) + NeedsCheck = (NumWritePtrChecks >= 2 || + (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1)); + + // We need to perform run-time alias checks, but some pointers had bounds + // that couldn't be checked. + if (NeedsCheck && !CanDoAliasSetRT) { + // Reset the CanDoSetRt flag and retry all accesses that have failed. + CanDoAliasSetRT = true; + for (auto Access : Retries) + if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, + TheLoop, RunningDepId, ASId, + ShouldCheckWrap, true)) { + CanDoAliasSetRT = false; + break; + } + } + CanDoRT &= CanDoAliasSetRT; + NeedRTCheck |= NeedsCheck; ++ASId; } Index: test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll =================================================================== --- /dev/null +++ test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll @@ -0,0 +1,107 @@ +; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +; i and i + 1 can overflow in the following kernel: +; void test1(unsigned long long x, int *a, int *b) { +; for (unsigned i = 0; i < x; ++i) +; b[i] = a[i+1] + 1; +; } +; +; If accesses to a and b can alias, we need to emit a run-time alias check +; between accesses to a and b. However, when i and i + 1 can wrap, their +; SCEV expression is not an AddRec. We need to create SCEV predicates and +; coerce the expressions to AddRecs in order to be able to emit the run-time +; alias check. +; +; The accesses at b[i] and a[i+1] correspond to the addresses %arrayidx and +; %arrayidx4 in the test. The SCEV expressions for these are: +; ((4 * (zext i32 {1,+,1}<%for.body> to i64)) + %a) +; ((4 * (zext i32 {0,+,1}<%for.body> to i64)) + %b) +; +; The transformed expressions are: +; i64 {(4 + %a),+,4}<%for.body> +; i64 {(4 + %a),+,4}<%for.body> + +; CHECK-LABEL: test1 +; CHECK: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom +; CHECK-NEXT: Against group +; CHECK-NEXT: %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11 +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group +; CHECK-NEXT: (Low: (4 + %a) High: (4 + (4 * (1 umax %x)) + %a)) +; CHECK-NEXT: Member: {(4 + %a),+,4}<%for.body> +; CHECK-NEXT: Group +; CHECK-NEXT: (Low: %b High: ((4 * (1 umax %x)) + %b)) +; CHECK-NEXT: Member: {%b,+,4}<%for.body> +; CHECK: Store to invariant address was not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: +; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: +; CHECK: Expressions re-written: +; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom: +; CHECK-NEXT: ((4 * (zext i32 {1,+,1}<%for.body> to i64)) + %a) +; CHECK-NEXT: --> {(4 + %a),+,4}<%for.body> +; CHECK-NEXT: [PSE] %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11: +; CHECK-NEXT: ((4 * (zext i32 {0,+,1}<%for.body> to i64)) + %b) +; CHECK-NEXT: --> {%b,+,4}<%for.body> +define void @test1(i64 %x, i32* %a, i32* %b) { +entry: + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %conv11 = phi i64 [ %conv, %for.body ], [ 0, %entry ] + %i.010 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %add = add i32 %i.010, 1 + %idxprom = zext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom + %ld = load i32, i32* %arrayidx, align 4 + %add2 = add nsw i32 %ld, 1 + %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11 + store i32 %add2, i32* %arrayidx4, align 4 + %conv = zext i32 %add to i64 + %cmp = icmp ult i64 %conv, %x + br i1 %cmp, label %for.body, label %exit + +exit: + ret void +} + +; i can oveflow in the following kernel: +; void test2(unsigned long long x, int *a) { +; for (unsigned i = 0; i < x; ++i) +; a[i] = a[i] + 1; +; } +; +; We need to check that i doesn't wrap, but we don't need a run-time alias +; check. We also need an extra no-wrap check to get the backedge taken count. + +; CHECK-LABEL: test2 +; CHECK: Memory dependences are safe +; CHECK: SCEV assumptions: +; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: +; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: + define void @test2(i64 %x, i32* %a) { +entry: + br label %for.body + +for.body: + %conv11 = phi i64 [ %conv, %for.body ], [ 0, %entry ] + %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %conv11 + %ld = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %ld, 1 + store i32 %add, i32* %arrayidx, align 4 + %inc = add i32 %i.010, 1 + %conv = zext i32 %inc to i64 + %cmp = icmp ult i64 %conv, %x + br i1 %cmp, label %for.body, label %exit + +exit: + ret void +}