Index: llvm/trunk/docs/LangRef.rst =================================================================== --- llvm/trunk/docs/LangRef.rst +++ llvm/trunk/docs/LangRef.rst @@ -1753,6 +1753,15 @@ generated code. For more details, see :ref:`GC Transitions `. +Affected Operand Bundles +^^^^^^^^^^^^^^^^^^^^^^^^ + +Affected operand bundles are characterized by the ``"affected"`` operand bundle +tag. These operand bundles indicate that a call, specifically a call to an +intrinsic like ``llvm.assume``, implies some additional knowledge about the +values within the bundle. This enables the optimizer to efficiently find these +relationships. The optimizer will add these automatically. + .. _moduleasm: Module-Level Inline Assembly Index: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h @@ -532,6 +532,10 @@ /// ValueExprMapType ValueExprMap; + /// This is a map of SCEVs to intrinsics (e.g. assumptions) that might affect + /// (i.e. imply something about) them. + DenseMap> AffectedMap; + /// Mark predicate values currently being processed by isImpliedCond. SmallPtrSet PendingLoopPredicates; @@ -800,6 +804,9 @@ ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop, const SCEV *MaxBECount, unsigned BitWidth); + /// Add to the AffectedMap this SCEV if its operands are in the AffectedMap. + void addAffectedFromOperands(const SCEV *S); + /// We know that there is no SCEV for the specified value. Analyze the /// expression. const SCEV *createSCEV(Value *V); Index: llvm/trunk/lib/Analysis/CodeMetrics.cpp =================================================================== --- llvm/trunk/lib/Analysis/CodeMetrics.cpp +++ llvm/trunk/lib/Analysis/CodeMetrics.cpp @@ -76,20 +76,12 @@ SmallPtrSet Visited; SmallVector Worklist; - for (auto &AssumeVH : AC->assumptions()) { - if (!AssumeVH) - continue; - Instruction *I = cast(AssumeVH); - - // Filter out call sites outside of the loop so we don't do a function's - // worth of work for each of its loops (and, in the common case, ephemeral - // values in the loop are likely due to @llvm.assume calls in the loop). - if (!L->contains(I->getParent())) - continue; - - if (EphValues.insert(I).second) - appendSpeculatableOperands(I, Visited, Worklist); - } + for (auto &B : L->blocks()) + for (auto &I : *B) + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::assume && + EphValues.insert(II).second) + appendSpeculatableOperands(II, Visited, Worklist); completeEphemeralValues(Visited, Worklist, EphValues); } @@ -100,16 +92,12 @@ SmallPtrSet Visited; SmallVector Worklist; - for (auto &AssumeVH : AC->assumptions()) { - if (!AssumeVH) - continue; - Instruction *I = cast(AssumeVH); - assert(I->getParent()->getParent() == F && - "Found assumption for the wrong function!"); - - if (EphValues.insert(I).second) - appendSpeculatableOperands(I, Visited, Worklist); - } + for (auto &B : *F) + for (auto &I : B) + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::assume && + EphValues.insert(II).second) + appendSpeculatableOperands(II, Visited, Worklist); completeEphemeralValues(Visited, Worklist, EphValues); } Index: llvm/trunk/lib/Analysis/LazyValueInfo.cpp =================================================================== --- llvm/trunk/lib/Analysis/LazyValueInfo.cpp +++ llvm/trunk/lib/Analysis/LazyValueInfo.cpp @@ -924,14 +924,16 @@ if (!BBI) return; - for (auto &AssumeVH : AC->assumptions()) { - if (!AssumeVH) + for (auto *U : Val->users()) { + auto *II = dyn_cast(U); + if (!II) continue; - auto *I = cast(AssumeVH); - if (!isValidAssumeForContext(I, BBI, DT)) + if (II->getIntrinsicID() != Intrinsic::assume) + continue; + if (!isValidAssumeForContext(II, BBI, DT)) continue; - BBLV = intersect(BBLV, getValueFromCondition(Val, I->getArgOperand(0))); + BBLV = intersect(BBLV, getValueFromCondition(Val, II->getArgOperand(0))); } // If guards are not used in the module, don't spend time looking for them Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -1212,6 +1212,7 @@ SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addAffectedFromOperands(S); return S; } @@ -1598,7 +1599,7 @@ // these to prove lack of overflow. Use this fact to avoid // doing extra work that may not pay off. if (!isa(MaxBECount) || HasGuards || - !AC.assumptions().empty()) { + !AffectedMap.empty()) { // If the backedge is guarded by a comparison with the pre-inc // value the addrec is safe. Also, if the entry is guarded by // a comparison with the start value and the backedge is @@ -1664,6 +1665,7 @@ SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addAffectedFromOperands(S); return S; } @@ -1833,7 +1835,7 @@ // doing extra work that may not pay off. if (!isa(MaxBECount) || HasGuards || - !AC.assumptions().empty()) { + !AffectedMap.empty()) { // If the backedge is guarded by a comparison with the pre-inc // value the addrec is safe. Also, if the entry is guarded by // a comparison with the start value and the backedge is @@ -1891,6 +1893,7 @@ SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addAffectedFromOperands(S); return S; } @@ -2444,6 +2447,7 @@ S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); + addAffectedFromOperands(S); } S->setNoWrapFlags(Flags); return S; @@ -2736,6 +2740,7 @@ S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); + addAffectedFromOperands(S); } S->setNoWrapFlags(Flags); return S; @@ -2856,6 +2861,7 @@ SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); UniqueSCEVs.InsertNode(S, IP); + addAffectedFromOperands(S); return S; } @@ -3036,6 +3042,7 @@ S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Operands.size(), L); UniqueSCEVs.InsertNode(S, IP); + addAffectedFromOperands(S); } S->setNoWrapFlags(Flags); return S; @@ -3191,6 +3198,7 @@ SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); + addAffectedFromOperands(S); return S; } @@ -3292,6 +3300,7 @@ SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); + addAffectedFromOperands(S); return S; } @@ -3492,9 +3501,38 @@ ExprValueMap[Stripped].insert({V, Offset}); } } + + // If this value is an instruction or an argument, and might be affected by + // an assumption, and its SCEV to the AffectedMap. + if (isa(V) || isa(V)) { + for (auto *U : V->users()) { + auto *II = dyn_cast(U); + if (!II) + continue; + if (II->getIntrinsicID() != Intrinsic::assume) + continue; + + AffectedMap[S].insert(II); + } + } + return S; } +// If one of this SCEV's operands is in the AffectedMap (meaning that it might +// be affected by an assumption), then this SCEV might be affected by the same +// assumption. +void ScalarEvolution::addAffectedFromOperands(const SCEV *S) { + if (auto *NS = dyn_cast(S)) + for (auto *Op : NS->operands()) { + auto AMI = AffectedMap.find(Op); + if (AMI == AffectedMap.end()) + continue; + + AffectedMap[S].insert(AMI->second.begin(), AMI->second.end()); + } +} + const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); @@ -7926,16 +7964,23 @@ } // Check conditions due to any @llvm.assume intrinsics. - for (auto &AssumeVH : AC.assumptions()) { - if (!AssumeVH) - continue; - auto *CI = cast(AssumeVH); - if (!DT.dominates(CI, Latch->getTerminator())) - continue; + auto CheckAssumptions = [&](const SCEV *S) { + auto AMI = AffectedMap.find(S); + if (AMI != AffectedMap.end()) + for (auto *Assume : AMI->second) { + auto *CI = cast(Assume); + if (!DT.dominates(CI, Latch->getTerminator())) + continue; - if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) - return true; - } + if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) + return true; + } + + return false; + }; + + if (CheckAssumptions(LHS) || CheckAssumptions(RHS)) + return true; // If the loop is not reachable from the entry block, we risk running into an // infinite loop as we walk up into the dom tree. These loops do not matter @@ -8020,16 +8065,23 @@ } // Check conditions due to any @llvm.assume intrinsics. - for (auto &AssumeVH : AC.assumptions()) { - if (!AssumeVH) - continue; - auto *CI = cast(AssumeVH); - if (!DT.dominates(CI, L->getHeader())) - continue; + auto CheckAssumptions = [&](const SCEV *S) { + auto AMI = AffectedMap.find(S); + if (AMI != AffectedMap.end()) + for (auto *Assume : AMI->second) { + auto *CI = cast(Assume); + if (!DT.dominates(CI, L->getHeader())) + continue; - if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) - return true; - } + if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) + return true; + } + + return false; + }; + + if (CheckAssumptions(LHS) || CheckAssumptions(RHS)) + return true; return false; } Index: llvm/trunk/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp +++ llvm/trunk/lib/Analysis/ValueTracking.cpp @@ -526,31 +526,28 @@ unsigned BitWidth = KnownZero.getBitWidth(); - for (auto &AssumeVH : Q.AC->assumptions()) { - if (!AssumeVH) + for (auto *U : V->users()) { + auto *II = dyn_cast(U); + if (!II) continue; - CallInst *I = cast(AssumeVH); - assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && - "Got assumption for the wrong function!"); - if (Q.isExcluded(I)) + if (II->getIntrinsicID() != Intrinsic::assume) + continue; + if (Q.isExcluded(II)) continue; - // Warning: This loop can end up being somewhat performance sensetive. - // We're running this loop for once for each value queried resulting in a - // runtime of ~O(#assumes * #values). - - assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && - "must be an assume intrinsic"); - - Value *Arg = I->getArgOperand(0); + Value *Arg = II->getArgOperand(0); - if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + if (Arg == V && isValidAssumeForContext(II, Q.CxtI, Q.DT)) { assert(BitWidth == 1 && "assume operand is not i1?"); KnownZero.clearAllBits(); KnownOne.setAllBits(); return; } + // Note that the patterns below need to be kept in sync with the code + // in InstCombiner::visitCallInst that adds relevant values to each + // assume's operand bundles. + // The remaining tests are all recursive, so bail out if we hit the limit. if (Depth == MaxDepth) continue; @@ -564,20 +561,20 @@ ConstantInt *C; // assume(v = a) if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); KnownZero |= RHSKnownZero; KnownOne |= RHSKnownOne; // assume(v & b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); - computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, II)); // For those bits in the mask that are known to be one, we can propagate // known bits from the RHS to V. @@ -587,11 +584,11 @@ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); - computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, II)); // For those bits in the mask that are known to be one, we can propagate // inverted known bits from the RHS to V. @@ -601,11 +598,11 @@ } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, II)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. @@ -615,11 +612,11 @@ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, II)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. @@ -629,11 +626,11 @@ } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, II)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. For those bits in B that are known to be one, @@ -646,11 +643,11 @@ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, II)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. For those bits in B that are @@ -663,9 +660,9 @@ } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. KnownZero |= RHSKnownZero.lshr(C->getZExtValue()); @@ -674,9 +671,9 @@ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. KnownZero |= RHSKnownOne.lshr(C->getZExtValue()); @@ -687,9 +684,9 @@ m_AShr(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. KnownZero |= RHSKnownZero << C->getZExtValue(); @@ -700,9 +697,9 @@ m_AShr(m_V, m_ConstantInt(C)))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. KnownZero |= RHSKnownOne << C->getZExtValue(); @@ -710,9 +707,9 @@ // assume(v >=_s c) where c is non-negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGE && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); if (RHSKnownZero.isNegative()) { // We know that the sign bit is zero. @@ -721,9 +718,9 @@ // assume(v >_s c) where c is at least -1. } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGT && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) { // We know that the sign bit is zero. @@ -732,9 +729,9 @@ // assume(v <=_s c) where c is negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLE && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); if (RHSKnownOne.isNegative()) { // We know that the sign bit is one. @@ -743,9 +740,9 @@ // assume(v <_s c) where c is non-positive } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLT && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) { // We know that the sign bit is one. @@ -754,9 +751,9 @@ // assume(v <=_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULE && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); // Whatever high bits in c are zero are known to be zero. KnownZero |= @@ -764,13 +761,13 @@ // assume(v <_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULT && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(II, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); // Whatever high bits in c are zero are known to be zero (if c is a power // of 2, then one more). - if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) + if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, II))) KnownZero |= APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1); else Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2518,6 +2518,78 @@ if (KnownOne.isAllOnesValue()) return eraseInstFromFunction(*II); + // For assumptions, add to the associated operand bundle the values to which + // the assumption might apply. + // Note: This code must be kept in-sync with the code in + // computeKnownBitsFromAssume in ValueTracking. + SmallVector Affected; + auto AddAffected = [&Affected](Value *V) { + if (isa(V)) { + Affected.push_back(V); + } else if (auto *I = dyn_cast(V)) { + Affected.push_back(I); + + if (I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt) { + V = I->getOperand(0); + if (isa(V) || isa(V)) + Affected.push_back(V); + } + } + }; + + CmpInst::Predicate Pred; + if (match(IIOperand, m_ICmp(Pred, m_Value(A), m_Value(B)))) { + AddAffected(A); + AddAffected(B); + + if (Pred == ICmpInst::ICMP_EQ) { + // For equality comparisons, we handle the case of bit inversion. + auto AddAffectedFromEq = [&AddAffected](Value *V) { + Value *A; + if (match(V, m_Not(m_Value(A)))) { + AddAffected(A); + V = A; + } + + Value *B; + ConstantInt *C; + if (match(V, + m_CombineOr(m_And(m_Value(A), m_Value(B)), + m_CombineOr(m_Or(m_Value(A), m_Value(B)), + m_Xor(m_Value(A), m_Value(B)))))) { + AddAffected(A); + AddAffected(B); + } else if (match(V, + m_CombineOr(m_Shl(m_Value(A), m_ConstantInt(C)), + m_CombineOr(m_LShr(m_Value(A), m_ConstantInt(C)), + m_AShr(m_Value(A), + m_ConstantInt(C)))))) { + AddAffected(A); + } + }; + + AddAffectedFromEq(A); + AddAffectedFromEq(B); + } + } + + // If the list of affected values is the same as the existing list then + // there's nothing more to do here. + if (!Affected.empty()) + if (auto OB = CI.getOperandBundle("affected")) + if (Affected.size() == OB.getValue().Inputs.size() && + std::equal(Affected.begin(), Affected.end(), + OB.getValue().Inputs.begin())) + Affected.clear(); + + if (!Affected.empty()) { + Builder->CreateCall(AssumeIntrinsic, IIOperand, + OperandBundleDef("affected", Affected), + II->getName()); + return eraseInstFromFunction(*II); + } + break; } case Intrinsic::experimental_gc_relocate: { Index: llvm/trunk/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ llvm/trunk/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -425,9 +425,12 @@ NewSrcAlignments.clear(); bool Changed = false; - for (auto &AssumeVH : AC.assumptions()) - if (AssumeVH) - Changed |= processAssumption(cast(AssumeVH)); + + for (auto &B : F) + for (auto &I : B) + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::assume) + Changed |= processAssumption(II); return Changed; } Index: llvm/trunk/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll @@ -55,7 +55,7 @@ %cmp = icmp slt i32 %iv, 10000 ; CHECK: %iv.sext = sext i32 %iv to i64 ; CHECK-NEXT: --> {0,+,3}<%loop> - call void @llvm.assume(i1 %cmp) + call void @llvm.assume(i1 %cmp) [ "affected"(i32 %iv) ] %c = load volatile i1, i1* %cond br i1 %c, label %loop, label %leave @@ -159,7 +159,7 @@ %cmp = icmp ugt i32 %iv.inc, -10000 ; CHECK: %iv.zext = zext i32 %iv to i64 ; CHECK-NEXT: --> {30000,+,-2}<%loop> - call void @llvm.assume(i1 %cmp) + call void @llvm.assume(i1 %cmp) [ "affected"(i32 %iv.inc) ] %c = load volatile i1, i1* %cond br i1 %c, label %loop, label %leave Index: llvm/trunk/test/Analysis/ScalarEvolution/nsw-offset-assume.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/nsw-offset-assume.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/nsw-offset-assume.ll @@ -11,7 +11,7 @@ entry: %n = and i32 %no, 4294967294 %0 = icmp sgt i32 %n, 0 ; [#uses=1] - tail call void @llvm.assume(i1 %0) + tail call void @llvm.assume(i1 %0) [ "affected"(i32 %n) ] br label %bb.nph bb.nph: ; preds = %entry Index: llvm/trunk/test/Transforms/CorrelatedValuePropagation/conflict.ll =================================================================== --- llvm/trunk/test/Transforms/CorrelatedValuePropagation/conflict.ll +++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/conflict.ll @@ -26,7 +26,7 @@ define i8 @test2(i8 %a) { ; CHECK-LABEL: @test2 %cmp1 = icmp eq i8 %a, 5 - call void @llvm.assume(i1 %cmp1) + call void @llvm.assume(i1 %cmp1) [ "affected"(i8 %a) ] %cmp2 = icmp eq i8 %a, 3 ; CHECK: br i1 false, label %dead, label %exit br i1 %cmp2, label %dead, label %exit @@ -43,7 +43,7 @@ dead: %cmp2 = icmp eq i8 %a, 3 ; CHECK: call void @llvm.assume(i1 false) - call void @llvm.assume(i1 %cmp2) + call void @llvm.assume(i1 %cmp2) [ "affected"(i8 %a) ] ret i8 %a exit: ret i8 0 Index: llvm/trunk/test/Transforms/InstCombine/assume-redundant.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/assume-redundant.ll +++ llvm/trunk/test/Transforms/InstCombine/assume-redundant.ll @@ -11,7 +11,7 @@ define void @_Z3fooR1s(%struct.s* nocapture readonly dereferenceable(8) %x) #0 { ; CHECK-LABEL: @_Z3fooR1s -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %maskcond) [ "affected"(i64 %maskedptr, i64 %ptrint, double* %{{.*}}) ] ; CHECK-NOT: call void @llvm.assume entry: Index: llvm/trunk/test/Transforms/InstCombine/assume.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/assume.ll +++ llvm/trunk/test/Transforms/InstCombine/assume.ll @@ -11,7 +11,7 @@ ; been removed: ; CHECK-LABEL: @foo1 ; CHECK-DAG: load i32, i32* %a, align 32 -; CHECK-DAG: call void @llvm.assume +; CHECK-DAG: call void @llvm.assume(i1 %maskcond) [ "affected"(i64 %maskedptr, i64 %ptrint, i32* %a) ] ; CHECK: ret i32 %ptrint = ptrtoint i32* %a to i64 @@ -28,7 +28,7 @@ ; Same check as in @foo1, but make sure it works if the assume is first too. ; CHECK-LABEL: @foo2 ; CHECK-DAG: load i32, i32* %a, align 32 -; CHECK-DAG: call void @llvm.assume +; CHECK-DAG: call void @llvm.assume(i1 %maskcond) [ "affected"(i64 %maskedptr, i64 %ptrint, i32* %a) ] ; CHECK: ret i32 %ptrint = ptrtoint i32* %a to i64 @@ -51,7 +51,7 @@ ; CHECK: ret i32 4 %cmp = icmp eq i32 %a, 4 - tail call void @llvm.assume(i1 %cmp) + tail call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] ret i32 %a } @@ -93,7 +93,7 @@ %and1 = and i32 %a, 3 ; CHECK-LABEL: @bar1 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %and, i32 %a) ] ; CHECK: ret i32 1 %and = and i32 %a, 7 @@ -107,7 +107,7 @@ define i32 @bar2(i32 %a) #0 { entry: ; CHECK-LABEL: @bar2 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %and, i32 %a) ] ; CHECK: ret i32 1 %and = and i32 %a, 7 @@ -125,7 +125,7 @@ ; Don't be fooled by other assumes around. ; CHECK-LABEL: @bar3 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %and, i32 %a) ] ; CHECK: ret i32 1 tail call void @llvm.assume(i1 %x) @@ -145,8 +145,8 @@ %and1 = and i32 %b, 3 ; CHECK-LABEL: @bar4 -; CHECK: call void @llvm.assume -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %and, i32 %a) ] +; CHECK: call void @llvm.assume(i1 %cmp2) [ "affected"(i32 %a, i32 %b) ] ; CHECK: ret i32 1 %and = and i32 %a, 7 @@ -167,7 +167,7 @@ ret i32 %conv ; CHECK-LABEL: @icmp1 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] ; CHECK: ret i32 1 } @@ -182,7 +182,7 @@ ret i32 %lnot.ext ; CHECK-LABEL: @icmp2 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] ; CHECK: ret i32 0 } @@ -217,7 +217,7 @@ ; CHECK-LABEL: @nonnull2 ; CHECK-NOT: !nonnull -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %load) ] } ; Make sure the above canonicalization does not trigger @@ -236,7 +236,7 @@ ; CHECK-LABEL: @nonnull3 ; CHECK-NOT: !nonnull -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32* %load) ] } ; Make sure the above canonicalization does not trigger @@ -254,7 +254,7 @@ ; CHECK-LABEL: @nonnull4 ; CHECK-NOT: !nonnull -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32* %load) ] } Index: llvm/trunk/test/Transforms/InstCombine/assume2.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/assume2.ll +++ llvm/trunk/test/Transforms/InstCombine/assume2.ll @@ -9,7 +9,7 @@ define i32 @test1(i32 %a) #0 { entry: ; CHECK-LABEL: @test1 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %and, i32 %a) ] ; CHECK: ret i32 5 %and = and i32 %a, 15 @@ -24,7 +24,7 @@ define i32 @test2(i32 %a) #0 { entry: ; CHECK-LABEL: @test2 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a.not, i32 %a) ] ; CHECK: ret i32 2 %and = and i32 %a, 15 @@ -40,7 +40,7 @@ define i32 @test3(i32 %a) #0 { entry: ; CHECK-LABEL: @test3 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %v, i32 %a) ] ; CHECK: ret i32 5 %v = or i32 %a, 4294967280 @@ -55,7 +55,7 @@ define i32 @test4(i32 %a) #0 { entry: ; CHECK-LABEL: @test4 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a.not, i32 %a) ] ; CHECK: ret i32 2 %v = or i32 %a, 4294967280 @@ -71,7 +71,7 @@ define i32 @test5(i32 %a) #0 { entry: ; CHECK-LABEL: @test5 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] ; CHECK: ret i32 4 %v = xor i32 %a, 1 @@ -86,7 +86,7 @@ define i32 @test6(i32 %a) #0 { entry: ; CHECK-LABEL: @test6 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %v.mask, i32 %a) ] ; CHECK: ret i32 5 %v = shl i32 %a, 2 @@ -101,7 +101,7 @@ define i32 @test7(i32 %a) #0 { entry: ; CHECK-LABEL: @test7 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %v.mask, i32 %a) ] ; CHECK: ret i32 20 %v = lshr i32 %a, 2 @@ -116,7 +116,7 @@ define i32 @test8(i32 %a) #0 { entry: ; CHECK-LABEL: @test8 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %v.mask, i32 %a) ] ; CHECK: ret i32 20 %v = lshr i32 %a, 2 @@ -131,7 +131,7 @@ define i32 @test9(i32 %a) #0 { entry: ; CHECK-LABEL: @test9 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] ; CHECK: ret i32 0 %cmp = icmp sgt i32 %a, 5 @@ -145,7 +145,7 @@ define i32 @test10(i32 %a) #0 { entry: ; CHECK-LABEL: @test10 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] ; CHECK: ret i32 -2147483648 %cmp = icmp sle i32 %a, -2 @@ -159,7 +159,7 @@ define i32 @test11(i32 %a) #0 { entry: ; CHECK-LABEL: @test11 -; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] ; CHECK: ret i32 0 %cmp = icmp ule i32 %a, 256 Index: llvm/trunk/test/Transforms/InstSimplify/add-mask.ll =================================================================== --- llvm/trunk/test/Transforms/InstSimplify/add-mask.ll +++ llvm/trunk/test/Transforms/InstSimplify/add-mask.ll @@ -46,7 +46,7 @@ %b = load i32, i32* @B %b.and = and i32 %b, 1 %b.cnd = icmp eq i32 %b.and, 1 - call void @llvm.assume(i1 %b.cnd) + call void @llvm.assume(i1 %b.cnd) [ "affected"(i32 %b.and, i32 %b) ] %rhs = add i32 %a, %b %and = and i32 %a, %rhs Index: llvm/trunk/test/Transforms/JumpThreading/assume-edge-dom.ll =================================================================== --- llvm/trunk/test/Transforms/JumpThreading/assume-edge-dom.ll +++ llvm/trunk/test/Transforms/JumpThreading/assume-edge-dom.ll @@ -14,12 +14,12 @@ taken: %res1 = call i8* @escape() %a = icmp eq i8* %res1, null - tail call void @llvm.assume(i1 %a) + tail call void @llvm.assume(i1 %a) [ "affected"(i8* %res1) ] br label %done not_taken: %res2 = call i8* @escape() %b = icmp ne i8* %res2, null - tail call void @llvm.assume(i1 %b) + tail call void @llvm.assume(i1 %b) [ "affected"(i8* %res2) ] br label %done ; An assume that can be used to simplify this comparison dominates each Index: llvm/trunk/test/Transforms/JumpThreading/assume.ll =================================================================== --- llvm/trunk/test/Transforms/JumpThreading/assume.ll +++ llvm/trunk/test/Transforms/JumpThreading/assume.ll @@ -6,7 +6,7 @@ define i32 @test1(i32 %a, i32 %b) #0 { entry: %cmp = icmp sgt i32 %a, 5 - tail call void @llvm.assume(i1 %cmp) + tail call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] %cmp1 = icmp sgt i32 %b, 1234 br i1 %cmp1, label %if.then, label %if.else @@ -36,7 +36,7 @@ define i32 @test2(i32 %a) #0 { entry: %cmp = icmp sgt i32 %a, 5 - tail call void @llvm.assume(i1 %cmp) + tail call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] %cmp1 = icmp sgt i32 %a, 3 br i1 %cmp1, label %if.then, label %return Index: llvm/trunk/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll =================================================================== --- llvm/trunk/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll +++ llvm/trunk/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll @@ -75,10 +75,10 @@ ; CHECK-LABEL: @reassociate_gep_assume( ; assume(j >= 0) %cmp = icmp sgt i32 %j, -1 - call void @llvm.assume(i1 %cmp) + call void @llvm.assume(i1 %cmp) [ "affected"(i32 %j) ] %1 = add i32 %i, %j %cmp2 = icmp sgt i32 %1, -1 - call void @llvm.assume(i1 %cmp2) + call void @llvm.assume(i1 %cmp2) [ "affected"(i32 %1) ] %idxprom.j = zext i32 %j to i64 %2 = getelementptr float, float* %a, i64 %idxprom.j Index: llvm/trunk/test/Transforms/SimplifyCFG/switch-dead-default.ll =================================================================== --- llvm/trunk/test/Transforms/SimplifyCFG/switch-dead-default.ll +++ llvm/trunk/test/Transforms/SimplifyCFG/switch-dead-default.ll @@ -91,7 +91,7 @@ ; CHECK-LABEL: @test5 ; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false %cmp = icmp ult i8 %a, 2 - call void @llvm.assume(i1 %cmp) + call void @llvm.assume(i1 %cmp) [ "affected"(i8 %a) ] switch i8 %a, label %default [i8 1, label %true i8 0, label %false] true: @@ -112,7 +112,7 @@ ; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false %and = and i8 %a, 254 %cmp = icmp eq i8 %and, 254 - call void @llvm.assume(i1 %cmp) + call void @llvm.assume(i1 %cmp) [ "affected"(i8 %and, i8 %a) ] switch i8 %a, label %default [i8 255, label %true i8 254, label %false] true: @@ -134,7 +134,7 @@ ; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false %and = and i8 %a, 254 %cmp = icmp eq i8 %and, 254 - call void @llvm.assume(i1 %cmp) + call void @llvm.assume(i1 %cmp) [ "affected"(i8 %and, i8 %a) ] switch i8 %a, label %default [i8 255, label %true i8 254, label %false i8 0, label %also_dead] @@ -162,7 +162,7 @@ ; CHECK: switch i8 %and = and i8 %a, 254 %cmp = icmp eq i8 %and, undef - call void @llvm.assume(i1 %cmp) + call void @llvm.assume(i1 %cmp) [ "affected"(i8 %and, i8 %a) ] switch i8 %a, label %default [i8 255, label %true i8 254, label %false] true: