Index: llvm/lib/Transforms/Scalar/GuardWidening.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -51,6 +51,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" @@ -124,9 +125,11 @@ /// the one described at https://github.com/llvm/llvm-project/issues/60234. The /// safest way to do it is to expand the new condition at WC's block. static Instruction *findInsertionPointForWideCondition(Instruction *Guard) { + if (isGuard(Guard)) + return Guard; if (auto WC = extractWidenableCondition(Guard)) return cast(WC); - return Guard; + return nullptr; } class GuardWideningImpl { @@ -181,7 +184,9 @@ /// Compute the score for widening the condition in \p DominatedInstr /// into \p DominatingGuard. WideningScore computeWideningScore(Instruction *DominatedInstr, - Instruction *DominatingGuard); + Instruction *WideningPoint, + SmallVectorImpl &ChecksToHoist, + SmallVectorImpl &ChecksToWiden); /// Helper to check if \p V can be hoisted to \p InsertPos. bool canBeHoistedTo(const Value *V, const Instruction *InsertPos) const { @@ -192,18 +197,32 @@ bool canBeHoistedTo(const Value *V, const Instruction *InsertPos, SmallPtrSetImpl &Visited) const; + bool canBeHoistedTo(const SmallVectorImpl &Checks, + const Instruction *InsertPos) const { + return all_of(Checks, + [&](const Value *V) { return canBeHoistedTo(V, InsertPos); }); + } /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c /// canBeHoistedTo returned true. void makeAvailableAt(Value *V, Instruction *InsertPos) const; + void makeAvailableAt(const SmallVectorImpl &Checks, + Instruction *InsertPos) const { + for_each(Checks, [&](Value *V) { makeAvailableAt(V, InsertPos); }); + } + /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try /// to generate an expression computing the logical AND of \p Cond0 and Cond1. /// Return true if the expression computing the AND is only as /// expensive as computing one of the two. If \p InsertPt is true then /// actually generate the resulting expression, make it available at \p /// InsertPt and return it in \p Result (else no change to the IR is made). - bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt, - Value *&Result); + bool mergeChecks(SmallVectorImpl &ChecksToHoist, + SmallVectorImpl &ChecksToWiden, + Instruction *InsertPt, Value *&Result); + + Value *hoistChecks(SmallVectorImpl &ChecksToHoist, + Instruction *InsertPt, Value *OldCondition); /// Adds freeze to Orig and push it as far as possible very aggressively. /// Also replaces all uses of frozen instruction with frozen version. @@ -251,13 +270,16 @@ /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and /// append them to \p Checks. Returns true on success, may clobber \c Checks /// on failure. - bool parseRangeChecks(Value *CheckCond, SmallVectorImpl &Checks) { - SmallPtrSet Visited; - return parseRangeChecks(CheckCond, Checks, Visited); + bool parseRangeChecks(SmallVectorImpl &ToParse, + SmallVectorImpl &Checks) { + for (auto CheckCond : ToParse) { + if (!parseRangeChecks(CheckCond, Checks)) + return false; + } + return true; } - bool parseRangeChecks(Value *CheckCond, SmallVectorImpl &Checks, - SmallPtrSetImpl &Visited); + bool parseRangeChecks(Value *CheckCond, SmallVectorImpl &Checks); /// Combine the checks in \p Checks into a smaller set of checks and append /// them into \p CombinedChecks. Return true on success (i.e. all of checks @@ -268,16 +290,21 @@ /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of /// computing only one of the two expressions? - bool isWideningCondProfitable(Value *Cond0, Value *Cond1) { + bool isWideningCondProfitable(SmallVectorImpl &ChecksToHoist, + SmallVectorImpl &ChecksToWiden) { Value *ResultUnused; - return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused); + return mergeChecks(ChecksToHoist, ChecksToWiden, + /*InsertPt=*/nullptr, ResultUnused); } /// Widen \p ToWiden to fail if \p NewCondition is false - void widenGuard(Instruction *ToWiden, Value *NewCondition) { + void widenGuard(SmallVectorImpl &ChecksToHoist, + SmallVectorImpl &ChecksToWiden, + Instruction *ToWiden) { Value *Result; Instruction *InsertPt = findInsertionPointForWideCondition(ToWiden); - widenCondCommon(getCondition(ToWiden), NewCondition, InsertPt, Result); + if (!mergeChecks(ChecksToHoist, ChecksToWiden, InsertPt, Result)) + Result = hoistChecks(ChecksToHoist, InsertPt, getCondition(ToWiden)); if (isGuardAsWidenableBranch(ToWiden)) { setWidenableBranchCond(cast(ToWiden), Result); return; @@ -345,10 +372,13 @@ Instruction *Instr, const df_iterator &DFSI, const DenseMap> &GuardsInBlock) { + SmallVector ChecksToHoist; + parseWidenableGuard(Instr, ChecksToHoist); // Ignore trivial true or false conditions. These instructions will be // trivially eliminated by any cleanup pass. Do not erase them because other // guards can possibly be widened into them. - if (isa(getCondition(Instr))) + if (ChecksToHoist.empty() || + (ChecksToHoist.size() == 1 && isa(ChecksToHoist.front()))) return false; Instruction *BestSoFar = nullptr; @@ -384,10 +414,15 @@ assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?"); for (auto *Candidate : make_range(I, E)) { - auto Score = computeWideningScore(Instr, Candidate); - LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr) - << " and " << *getCondition(Candidate) << " is " - << scoreTypeToString(Score) << "\n"); + auto *WideningPoint = findInsertionPointForWideCondition(Candidate); + if (!WideningPoint) + continue; + SmallVector CandidateChecks; + parseWidenableGuard(Candidate, CandidateChecks); + auto Score = computeWideningScore(Instr, WideningPoint, ChecksToHoist, + CandidateChecks); + LLVM_DEBUG(dbgs() << "Score between " << *Instr << " and " << *Candidate + << " is " << scoreTypeToString(Score) << "\n"); if (Score > BestScoreSoFar) { BestScoreSoFar = Score; BestSoFar = Candidate; @@ -406,7 +441,9 @@ LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar << " with score " << scoreTypeToString(BestScoreSoFar) << "\n"); - widenGuard(BestSoFar, getCondition(Instr)); + SmallVector ChecksToWiden; + parseWidenableGuard(BestSoFar, ChecksToWiden); + widenGuard(ChecksToHoist, ChecksToWiden, BestSoFar); auto NewGuardCondition = ConstantInt::getTrue(Instr->getContext()); setCondition(Instr, NewGuardCondition); EliminatedGuardsAndBranches.push_back(Instr); @@ -414,11 +451,12 @@ return true; } -GuardWideningImpl::WideningScore -GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr, - Instruction *DominatingGuard) { +GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( + Instruction *DominatedInstr, Instruction *WideningPoint, + SmallVectorImpl &ChecksToHoist, + SmallVectorImpl &ChecksToWiden) { Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent()); - Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent()); + Loop *DominatingGuardLoop = LI.getLoopFor(WideningPoint->getParent()); bool HoistingOutOfLoop = false; if (DominatingGuardLoop != DominatedInstrLoop) { @@ -431,10 +469,9 @@ HoistingOutOfLoop = true; } - auto *WideningPoint = findInsertionPointForWideCondition(DominatingGuard); - if (!canBeHoistedTo(getCondition(DominatedInstr), WideningPoint)) + if (!canBeHoistedTo(ChecksToHoist, WideningPoint)) return WS_IllegalOrNegative; - if (!canBeHoistedTo(getCondition(DominatingGuard), WideningPoint)) + if (!canBeHoistedTo(ChecksToWiden, WideningPoint)) return WS_IllegalOrNegative; // If the guard was conditional executed, it may never be reached @@ -445,8 +482,7 @@ // here. TODO: evaluate cost model for spurious deopt // NOTE: As written, this also lets us hoist right over another guard which // is essentially just another spelling for control flow. - if (isWideningCondProfitable(getCondition(DominatedInstr), - getCondition(DominatingGuard))) + if (isWideningCondProfitable(ChecksToHoist, ChecksToWiden)) return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; if (HoistingOutOfLoop) @@ -482,7 +518,7 @@ // control flow (guards, calls which throw, etc...). That choice appears // arbitrary (we assume that implicit control flow exits are all rare). auto MaybeHoistingToHotterBlock = [&]() { - const auto *DominatingBlock = DominatingGuard->getParent(); + const auto *DominatingBlock = WideningPoint->getParent(); const auto *DominatedBlock = DominatedInstr->getParent(); // Descend as low as we can, always taking the likely successor. @@ -508,7 +544,8 @@ if (!DT.dominates(DominatingBlock, DominatedBlock)) return true; // TODO: diamond, triangle cases - if (!PDT) return true; + if (!PDT) + return true; return !PDT->dominates(DominatedBlock, DominatingBlock); }; @@ -654,17 +691,20 @@ return Result; } -bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, - Instruction *InsertPt, Value *&Result) { +bool GuardWideningImpl::mergeChecks(SmallVectorImpl &ChecksToHoist, + SmallVectorImpl &ChecksToWiden, + Instruction *InsertPt, Value *&Result) { using namespace llvm::PatternMatch; - { // L >u C0 && L >u C1 -> L >u max(C0, C1) ConstantInt *RHS0, *RHS1; Value *LHS; ICmpInst::Predicate Pred0, Pred1; - if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && - match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) { + if (ChecksToWiden.size() == 1 && ChecksToHoist.size() == 1 && + match(ChecksToWiden.front(), + m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && + match(ChecksToHoist.front(), + m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) { ConstantRange CR0 = ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue()); @@ -681,7 +721,7 @@ if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) { if (InsertPt) { ConstantInt *NewRHS = - ConstantInt::get(Cond0->getContext(), NewRHSAP); + ConstantInt::get(InsertPt->getContext(), NewRHSAP); assert(canBeHoistedTo(LHS, InsertPt) && "must be"); makeAvailableAt(LHS, InsertPt); Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk"); @@ -694,7 +734,8 @@ { SmallVector Checks, CombinedChecks; - if (parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) && + if (parseRangeChecks(ChecksToWiden, Checks) && + parseRangeChecks(ChecksToHoist, Checks) && combineRangeChecks(Checks, CombinedChecks)) { if (InsertPt) { Result = nullptr; @@ -713,35 +754,28 @@ return true; } } - - // Base case -- just logical-and the two conditions together. - - if (InsertPt) { - makeAvailableAt(Cond0, InsertPt); - makeAvailableAt(Cond1, InsertPt); - Cond1 = freezeAndPush(Cond1, InsertPt); - Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt); - } - // We were not able to compute Cond0 AND Cond1 for the price of one. return false; } -bool GuardWideningImpl::parseRangeChecks( - Value *CheckCond, SmallVectorImpl &Checks, - SmallPtrSetImpl &Visited) { - if (!Visited.insert(CheckCond).second) - return true; +Value *GuardWideningImpl::hoistChecks(SmallVectorImpl &ChecksToHoist, + Instruction *InsertPt, + Value *OldCondition) { + assert(!ChecksToHoist.empty()); + IRBuilder<> Builder(InsertPt); + makeAvailableAt(ChecksToHoist, InsertPt); + makeAvailableAt(OldCondition, InsertPt); + Value *Result = Builder.CreateAnd(ChecksToHoist); + Result = freezeAndPush(Result, InsertPt); + Result = Builder.CreateAnd(OldCondition, Result); + Result->setName("wide.chk"); + return Result; +} +bool GuardWideningImpl::parseRangeChecks( + Value *CheckCond, SmallVectorImpl &Checks) { using namespace llvm::PatternMatch; - { - Value *AndLHS, *AndRHS; - if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS)))) - return parseRangeChecks(AndLHS, Checks) && - parseRangeChecks(AndRHS, Checks); - } - auto *IC = dyn_cast(CheckCond); if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() || (IC->getPredicate() != ICmpInst::ICMP_ULT && Index: llvm/test/Transforms/GuardWidening/range-check-merging.ll =================================================================== --- llvm/test/Transforms/GuardWidening/range-check-merging.ll +++ llvm/test/Transforms/GuardWidening/range-check-merging.ll @@ -292,23 +292,24 @@ ; CHECK-NEXT: [[CHK0_B:%.*]] = icmp ult i32 [[X_GW_FR]], [[LENGTH_B]] ; CHECK-NEXT: [[CHK0:%.*]] = and i1 [[CHK0_A]], [[CHK0_B]] ; CHECK-NEXT: [[X_INC1:%.*]] = add i32 [[X_GW_FR]], 1 -; CHECK-NEXT: [[CHK1_A:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH_A]] ; CHECK-NEXT: [[CHK1_B:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH_B]] -; CHECK-NEXT: [[CHK1:%.*]] = and i1 [[CHK1_A]], [[CHK1_B]] -; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[CHK0]], [[CHK1]] +; CHECK-NEXT: [[CHK1_A:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH_A]] +; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[CHK1_B]], [[CHK1_A]] +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[CHK0]], [[TMP0]] ; CHECK-NEXT: [[X_INC2:%.*]] = add i32 [[X_GW_FR]], 2 ; CHECK-NEXT: [[CHK2_A:%.*]] = icmp ult i32 [[X_INC2]], [[LENGTH_A]] -; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[CHK2_A]], [[CHK0_A]] -; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[CHK0_B]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[CHK2_A]], [[CHK0_A]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[CHK0_B]], [[TMP1]] ; CHECK-NEXT: [[CHK2_B:%.*]] = icmp ult i32 [[X_INC2]], [[LENGTH_B]] -; CHECK-NEXT: [[WIDE_CHK1:%.*]] = and i1 [[CHK2_B]], [[TMP1]] +; CHECK-NEXT: [[WIDE_CHK1:%.*]] = and i1 [[CHK2_B]], [[TMP2]] ; CHECK-NEXT: [[X_INC3:%.*]] = add i32 [[X_GW_FR]], 3 -; CHECK-NEXT: [[CHK3_B:%.*]] = icmp ult i32 [[X_INC3]], [[LENGTH_B]] -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[CHK3_B]], [[CHK0_B]] -; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[CHK0_A]], [[TMP2]] ; CHECK-NEXT: [[CHK3_A:%.*]] = icmp ult i32 [[X_INC3]], [[LENGTH_A]] -; CHECK-NEXT: [[WIDE_CHK2:%.*]] = and i1 [[CHK3_A]], [[TMP3]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[CHK3_A]], [[CHK0_A]] +; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[CHK0_B]], [[TMP3]] +; CHECK-NEXT: [[CHK3_B:%.*]] = icmp ult i32 [[X_INC3]], [[LENGTH_B]] +; CHECK-NEXT: [[WIDE_CHK2:%.*]] = and i1 [[CHK3_B]], [[TMP4]] ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK2]]) [ "deopt"() ] +; CHECK-NEXT: [[CHK1:%.*]] = and i1 [[CHK1_A]], [[CHK1_B]] ; CHECK-NEXT: [[CHK2:%.*]] = and i1 [[CHK2_A]], [[CHK2_B]] ; CHECK-NEXT: [[CHK3:%.*]] = and i1 [[CHK3_A]], [[CHK3_B]] ; CHECK-NEXT: ret void