Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Standalone View
llvm/lib/Analysis/ScalarEvolution.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 4,947 Lines • ▼ Show 20 Lines | for (auto *PoisonUser : Poison->users()) { | ||||
} | } | ||||
} | } | ||||
} | } | ||||
} | } | ||||
return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L); | return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L); | ||||
} | } | ||||
bool ScalarEvolution::loopHasNoSideEffects(const Loop *L) { | |||||
auto Itr = LoopHasNoSideEffects.find(L); | |||||
if (Itr == LoopHasNoSideEffects.end()) { | |||||
auto NoSideEffectsInBB = [&](BasicBlock *BB) { | |||||
return all_of(*BB, [](Instruction &I) { | |||||
// Non-atomic, non-volatile stores are ok. | |||||
if (auto *SI = dyn_cast<StoreInst>(&I)) | |||||
sanjoy: No braces needed here. Also, I'd drop the `else`, and just have:
```
if (X)
return Y… | |||||
Not Done ReplyInline ActionsUse auto * to denote that SI is a pointer. sanjoy: Use `auto *` to denote that `SI` is a pointer. | |||||
return SI->isSimple(); | |||||
return !I.mayHaveSideEffects(); | |||||
}); | |||||
}; | |||||
auto InsertPair = LoopHasNoSideEffects.insert( | |||||
{L, all_of(L->getBlocks(), NoSideEffectsInBB)}); | |||||
assert(InsertPair.second && "We just checked!"); | |||||
Itr = InsertPair.first; | |||||
} | |||||
return Itr->second; | |||||
} | |||||
bool ScalarEvolution::loopHasNoAbnormalExits(const Loop *L) { | bool ScalarEvolution::loopHasNoAbnormalExits(const Loop *L) { | ||||
auto Itr = LoopHasNoAbnormalExits.find(L); | auto Itr = LoopHasNoAbnormalExits.find(L); | ||||
if (Itr == LoopHasNoAbnormalExits.end()) { | if (Itr == LoopHasNoAbnormalExits.end()) { | ||||
auto NoAbnormalExitInBB = [&](BasicBlock *BB) { | auto NoAbnormalExitInBB = [&](BasicBlock *BB) { | ||||
return all_of(*BB, [](Instruction &I) { | return all_of(*BB, [](Instruction &I) { | ||||
return isGuaranteedToTransferExecutionToSuccessor(&I); | return isGuaranteedToTransferExecutionToSuccessor(&I); | ||||
}); | }); | ||||
}; | }; | ||||
▲ Show 20 Lines • Show All 571 Lines • ▼ Show 20 Lines | void ScalarEvolution::forgetLoop(const Loop *L) { | ||||
} | } | ||||
// Forget all contained loops too, to avoid dangling entries in the | // Forget all contained loops too, to avoid dangling entries in the | ||||
// ValuesAtScopes map. | // ValuesAtScopes map. | ||||
for (Loop *I : *L) | for (Loop *I : *L) | ||||
forgetLoop(I); | forgetLoop(I); | ||||
LoopHasNoAbnormalExits.erase(L); | LoopHasNoAbnormalExits.erase(L); | ||||
LoopHasNoSideEffects.erase(L); | |||||
} | } | ||||
void ScalarEvolution::forgetValue(Value *V) { | void ScalarEvolution::forgetValue(Value *V) { | ||||
Instruction *I = dyn_cast<Instruction>(V); | Instruction *I = dyn_cast<Instruction>(V); | ||||
if (!I) return; | if (!I) return; | ||||
// Drop information about expressions based on loop-header PHIs. | // Drop information about expressions based on loop-header PHIs. | ||||
SmallVector<Instruction *, 16> Worklist; | SmallVector<Instruction *, 16> Worklist; | ||||
▲ Show 20 Lines • Show All 3,126 Lines • ▼ Show 20 Lines | ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, | ||||
const Loop *L, bool IsSigned, | const Loop *L, bool IsSigned, | ||||
bool ControlsExit, bool AllowPredicates) { | bool ControlsExit, bool AllowPredicates) { | ||||
SCEVUnionPredicate P; | SCEVUnionPredicate P; | ||||
// We handle only IV < Invariant | // We handle only IV < Invariant | ||||
if (!isLoopInvariant(RHS, L)) | if (!isLoopInvariant(RHS, L)) | ||||
return getCouldNotCompute(); | return getCouldNotCompute(); | ||||
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); | const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); | ||||
if (!IV && AllowPredicates) | bool PredicatedIV = false; | ||||
if (!IV && AllowPredicates) { | |||||
// Try to make this an AddRec using runtime tests, in the first X | // Try to make this an AddRec using runtime tests, in the first X | ||||
// iterations of this loop, where X is the SCEV expression found by the | // iterations of this loop, where X is the SCEV expression found by the | ||||
// algorithm below. | // algorithm below. | ||||
IV = convertSCEVToAddRecWithPredicates(LHS, L, P); | IV = convertSCEVToAddRecWithPredicates(LHS, L, P); | ||||
PredicatedIV = true; | |||||
} | |||||
// Avoid weird loops | // Avoid weird loops | ||||
if (!IV || IV->getLoop() != L || !IV->isAffine()) | if (!IV || IV->getLoop() != L || !IV->isAffine()) | ||||
return getCouldNotCompute(); | return getCouldNotCompute(); | ||||
bool NoWrap = ControlsExit && | bool NoWrap = ControlsExit && | ||||
IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); | IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); | ||||
const SCEV *Stride = IV->getStepRecurrence(*this); | const SCEV *Stride = IV->getStepRecurrence(*this); | ||||
// Avoid negative or zero stride values | bool PositiveStride = isKnownPositive(Stride); | ||||
Not Done ReplyInline ActionsAs we discussed, it isn't that given the conditions you're testing we can prove that the stride is non-negative, but that the math below is correct even if the stride is negative. Also, I'd separate out the !NoWrap || !loopHasNoAbnormalExits(L) || !loopHasNoSideEffects(L) check into it's own if and have a small comment describing why they're necessary / sufficient (the example you had in the llvm-dev thread would be nice to add too). sanjoy: As we discussed, it isn't that given the conditions you're testing we can prove that the… | |||||
Not Done ReplyInline ActionsThe check for abnormal exits seems redundant now because loopHasNoSideEffects() is more conservative than it. I am removing this check as well. pankajchawla: The check for abnormal exits seems redundant now because loopHasNoSideEffects() is more… | |||||
if (!isKnownPositive(Stride)) | |||||
// Avoid negative or zero stride values. | |||||
if (!PositiveStride) | |||||
// We can compute the correct backedge taken count for loops with unknown | |||||
// strides if we can prove that the loop is not an infinite loop with side | |||||
// effects. Here's the loop structure we are trying to handle - | |||||
// | |||||
nit: "here -" sanjoy: nit: "here -" | |||||
// i = start | |||||
// do { | |||||
// A[i] = i; | |||||
// i += s; | |||||
// } while (i < end); | |||||
// | |||||
// The backedge taken count for such loops is evaluated as - | |||||
// (max(end, start + stride) - start - 1) /u stride | |||||
Not Done ReplyInline ActionsNit: blank before - sanjoy: Nit: blank before `-` | |||||
// | |||||
// The additional preconditions that we need to check to prove correctness | |||||
// of the above formula is as follows - | |||||
// | |||||
// a) IV is either nuw or nsw depending upon signedness (indicated by the | |||||
// NoWrap flag). | |||||
Not Done ReplyInline ActionsI'm not super-comfortable with this -- (IIUC) you're assuming that given an unknown stride, there are certain things SCEV cannot prove. This will create a tricky coupling between different parts and e.g. introduces the possibility that we'll get miscompiles if we make an unrelated part of SCEV smarter in some creative way (like you're doing here :) ). I think it is better to assume that the other bits in SCEV are omniscient (i.e. they may (but are not guaranteed to) prove anything that is factually correct at runtime), and base our inferences on that. The other thing that's missing here is some clear justification of why what you're doing is correct. This does not necessarily have to be a formal proof (though that'd make me really happy :) ) but at least a clear statement of preconditions you've assumed, and why that implies that the trip count of (max(rhs, start) - start + stride - 1) / u stride is correct. sanjoy: I'm not super-comfortable with this -- (IIUC) you're assuming that given an unknown stride… | |||||
Not Done ReplyInline Actions
The only argument I can give here is that no matter how smart SCEV gets, it can only propagate no wrap flags in 'edge cases' when it knows something about the stride (in fact, it may have to know something about all of start, end and stride). So this smartness should then get reflected in either isKnownPositive(stride) or isKnownNonPositive(stride) which this change is guarded under. Please let me know whether this is a satisfactory argument. I would prefer not to spend more time on a patch which is not likely to get accepted. I cannot think of a foolproof way to prune the 'edge cases'. Also, just so you know, scalar evolution is successfully able to compute the trip count for the edge case mentioned in the comments by computing the trip count exhaustively before ever reaching this section of the code.
I am not sure whether you are not convinced that this is correct or you just want this to be documented properly. I have tried to document all the preconditions. I will take a crack at giving the proof in case you are not convinced. First lets define the loop structure we are handling- i = start do { A[i] = i; i += s; } while (i < end); The proposition is that the backedge taken count of this loop for any combination of start, stride and end is: Since I haven't changed the original backedge taken count formula, I assume that you agree that the formula is correct when stride is known to be positive (original behavior of the function). Now, these are our preconditions- a) Comparator is either ICMP_ULT or ICMP_SLT. We can split all the possible scenarios for this loop into these three cases-
(max(end, start + stride) - start - 1) /u stride ==> Hence proved :) pankajchawla: > I'm not super-comfortable with this -- (IIUC) you're assuming that given an unknown stride… | |||||
Not Done ReplyInline Actions
Ideally we should not rely on things like isKnownPositive or isKnownNonPositive being uniformly precise all of the time -- i.e. an implementation that returns false unconditionally every 10th time these are called should be "okay" (terrible, but correct). This isn't just a thought experiment either -- isKnownPositive (since it internally calls getRange) returns a cached result, and the cache for the stride can be cleared independently of the add recurrence using that stride, and by that time the IR (or the cache, actually) could have been modified in a way that SCEV would not be able to re-prove isKnownPositive.
The "assume other parts of SCEV can be omniscient" is an important property, and I'm not okay with losing it without really good reasons. The only way forward here I can think of is to not do this optimization if the comparison is unsigned, since the only counter-example (such that the trip count formula is broken even with the preconditions satisfied) I've managed to produce is with unsigned compares. However, I'm not sure if bailing out on unsigned compares will actually work for the case that motivated this patch.
I was asking for a short comment documenting on why your line of reasoning is correct. sanjoy: > The only argument I can give here is that no matter how smart SCEV gets, it can only… | |||||
Not Done ReplyInline Actions
I am not relying on their preciseness, I am relying on their correctness. They can (and should) conservatively return false which would be fine. Returning false unconditionally every 10th time is not okay, it is clearly incorrect. What if I call it on the same value 10 times? Are you suggesting that it is ok for them to return true the first 9 times and false the 10th time for the same value?
If this is the case, I believe this is an issue with ScalarEvolution's implementation. The cache for the stride cannot be invalidated independently of the add recurrence it occurs in. We should invalidate SCEV for all values dependent on stride like what we do in forgetValue() otherwise ScalarEvolution is internally inconsistent. If we do not do this, the wrap flags applied on the add recurrence can be wrong and can lead to miscompilation. If isKnownPositive() cannot be re-proven, then the previous trip count computation which was made on this assumption is now invalid and therefore should be invalidated as well.
I can agree on the principle but I think you are applying it incorrectly. As a component, ScalarEvolution's different sub-components have to be in 'sync' with each other otherwise no correctness guarantees can be provided by the component as a whole. I am not limiting the omniscience of other parts of SCEV here, I am merely assuming that when they grow stronger, they grow stronger in sync.
The example you provided invalidates both signed and unsigned IVs. This is the IR produced by LLVM for your example- define void @foo(i8* nocapture %A, i32 %n) local_unnamed_addr #0 { entry: br label %for.body for.body: ; preds = %entry, %for.body %i.07 = phi i8 [ 127, %entry ], [ %add, %for.body ] %idxprom = zext i8 %i.07 to i64 %arrayidx = getelementptr inbounds i8, i8* %A, i64 %idxprom store i8 %i.07, i8* %arrayidx, align 1 %add = add i8 %i.07, -127 %cmp = icmp sgt i8 %add, -1 br i1 %cmp, label %for.body, label %for.end for.end: ; preds = %for.body ret void } This is the SCEV form of %add- There is already an nsw flag on it, and an nuw flag can be applied as well. So I cannot restrict it to signed IVs.
I will add a short version of the proof in the comments if we can agree that this patch should be committed. pankajchawla: > Ideally we should not rely on things like `isKnownPositive` or `isKnownNonPositive` being… | |||||
Not Done ReplyInline Actions
You're right, I think I'm getting carried away in some excessively rigid chain of reasoning which isn't really applicable here. I'm fine with this change landing as-is (with a short comment informally describing why what you've changed is correct). sanjoy: > I can agree on the principle but I think you are applying it incorrectly. As a component… | |||||
Not Done ReplyInline ActionsThanks! pankajchawla: Thanks!
I have updated the comments. | |||||
// b) loop is single exit with no side effects. | |||||
// | |||||
// | |||||
// Precondition a) implies that if the stride is negative, this is a single | |||||
// trip loop. The backedge taken count formula reduces to zero in this case. | |||||
// | |||||
// Precondition b) implies that the unknown stride cannot be zero otherwise | |||||
// we have UB. | |||||
// | |||||
// The positive stride case is the same as isKnownPositive(Stride) returning | |||||
// true (original behavior of the function). | |||||
// | |||||
// We want to make sure that the stride is truly unknown as there are edge | |||||
// cases where ScalarEvolution propagates no wrap flags to the | |||||
// post-increment/decrement IV even though the increment/decrement operation | |||||
// itself is wrapping. The computed backedge taken count may be wrong in | |||||
// such cases. This is prevented by checking that the stride is not known to | |||||
// be either positive or non-positive. For example, no wrap flags are | |||||
// propagated to the post-increment IV of this loop with a trip count of 2 - | |||||
// | |||||
// unsigned char i; | |||||
// for(i=127; i<128; i+=129) | |||||
// A[i] = i; | |||||
// | |||||
if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) || | |||||
!loopHasNoSideEffects(L)) | |||||
return getCouldNotCompute(); | return getCouldNotCompute(); | ||||
// Avoid proven overflow cases: this will ensure that the backedge taken count | // Avoid proven overflow cases: this will ensure that the backedge taken count | ||||
// will not generate any unsigned overflow. Relaxed no-overflow conditions | // will not generate any unsigned overflow. Relaxed no-overflow conditions | ||||
// exploit NoWrapFlags, allowing to optimize in presence of undefined | // exploit NoWrapFlags, allowing to optimize in presence of undefined | ||||
// behaviors like the case of C language. | // behaviors like the case of C language. | ||||
if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) | if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) | ||||
sanjoyUnsubmitted Not Done ReplyInline ActionsI just noticed that doesIVOverflowOnLT only works for a positive stride, can you please take a look? For a non-positive Stride, I think we can make doesIVOverflowOnLT just return NoWrap. sanjoy: I just noticed that `doesIVOverflowOnLT` only works for a positive stride, can you please take… | |||||
return getCouldNotCompute(); | return getCouldNotCompute(); | ||||
Not Done ReplyInline ActionsThe comments for this function imply that it expects a positive stride- I think this makes sense because the condition in the loop latch is of the form (i < N). I added an assert to the function verifying that the incoming stride is indeed positive. This check seems to be useless for unknown strides so I skip it. pankajchawla: The comments for this function imply that it expects a positive stride-
`Verify if an linear IV… | |||||
ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT | ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT | ||||
: ICmpInst::ICMP_ULT; | : ICmpInst::ICMP_ULT; | ||||
const SCEV *Start = IV->getStart(); | const SCEV *Start = IV->getStart(); | ||||
const SCEV *End = RHS; | const SCEV *End = RHS; | ||||
if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) | if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) | ||||
End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); | End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); | ||||
Not Done ReplyInline ActionsI don't follow why this check is necessary. eli.friedman: I don't follow why this check is necessary. | |||||
Not Done ReplyInline ActionsSorry, this is my first time using Phabricator. pankajchawla: Sorry, this is my first time using Phabricator.
I replied to this in my non-inline comment. | |||||
const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); | const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); | ||||
Not Done ReplyInline ActionsHere and in the comment in the test case, we're not assuming the stride is positive, but that the trip count math is correct even if stride is negative given that the conditions we checked above are correct; so I think the comment and the variable name needs to change. sanjoy: Here and in the comment in the test case, we're not assuming the stride is positive, but that… | |||||
Not Done ReplyInline ActionsI think this check is unnecessary as the computed backedge taken count is correct for single trip do-while loops as well. For a do-while loop like this- int i = init; do { A[i] = i; i += s; } while (i<n); The computed backedge taken count is: ((-1 + (-1 * %init) + ((%init + %s) smax %n)) /u %s) I will remove this check. pankajchawla: I think this check is unnecessary as the computed backedge taken count is correct for single… | |||||
APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin() | APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin() | ||||
: getUnsignedRange(Start).getUnsignedMin(); | : getUnsignedRange(Start).getUnsignedMin(); | ||||
APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() | unsigned BitWidth = getTypeSizeInBits(LHS->getType()); | ||||
// Using a minimum stride of 1 is safe when computing max backedge taken count | |||||
// for a loop with unknown stride. | |||||
APInt MinStride = !PositiveStride | |||||
sanjoyUnsubmitted Not Done ReplyInline ActionsI'd feel safer if you short-circuited the entire max becount computation based on PositiveStride, since MinStride is no longer what it says on the tin ("the minimum value of Stride). (To me) it basically looks like the expression below that computes the max be count implicitly assumes a positive stride, and forking the logic more explicitly will be more obvious. If that's a problem, I'd suggest at least renaming MinStride to something more in tune with what it represents now (perhaps StrideForMaxBECount?). sanjoy: I'd feel safer if you short-circuited the entire max becount computation based on… | |||||
? APInt(BitWidth, 1, IsSigned) | |||||
: IsSigned ? getSignedRange(Stride).getSignedMin() | |||||
: getUnsignedRange(Stride).getUnsignedMin(); | : getUnsignedRange(Stride).getUnsignedMin(); | ||||
unsigned BitWidth = getTypeSizeInBits(LHS->getType()); | |||||
APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1) | APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1) | ||||
: APInt::getMaxValue(BitWidth) - (MinStride - 1); | : APInt::getMaxValue(BitWidth) - (MinStride - 1); | ||||
// Although End can be a MAX expression we estimate MaxEnd considering only | // Although End can be a MAX expression we estimate MaxEnd considering only | ||||
// the case End = RHS. This is safe because in the other case (End - Start) | // the case End = RHS. This is safe because in the other case (End - Start) | ||||
Not Done ReplyInline ActionsI renamed MinStride to StrideForMaxBECount. I would have had to duplicate code to completely separate max backedge count computation for the two cases. pankajchawla: I renamed `MinStride` to `StrideForMaxBECount`. I would have had to duplicate code to… | |||||
// is zero, leading to a zero maximum backedge taken count. | // is zero, leading to a zero maximum backedge taken count. | ||||
APInt MaxEnd = | APInt MaxEnd = | ||||
Not Done ReplyInline ActionsIf it's all the same to you, I'd suggest avoiding a nested ternary and using an if instead. sanjoy: If it's all the same to you, I'd suggest avoiding a nested ternary and using an `if` instead. | |||||
Not Done ReplyInline ActionsI will change it to an if. pankajchawla: I will change it to an if. | |||||
IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit) | IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit) | ||||
: APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit); | : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit); | ||||
const SCEV *MaxBECount; | const SCEV *MaxBECount; | ||||
if (isa<SCEVConstant>(BECount)) | if (isa<SCEVConstant>(BECount)) | ||||
MaxBECount = BECount; | MaxBECount = BECount; | ||||
else | else | ||||
MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), | MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), | ||||
▲ Show 20 Lines • Show All 1,761 Lines • Show Last 20 Lines |
No braces needed here. Also, I'd drop the else, and just have: