diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -113,10 +113,16 @@ /// number of steps. bool CheckWiden; + /// Provides a known upper bound for the merge result. Use this range + /// instead of going to overdefined for widening, if WidenCR contains the + /// merge result. + ConstantRange WidenCR; + MergeOptions() : MergeOptions(false, false) {} MergeOptions(bool MayIncludeUndef, bool CheckWiden) - : MayIncludeUndef(MayIncludeUndef), CheckWiden(CheckWiden) {} + : MayIncludeUndef(MayIncludeUndef), CheckWiden(CheckWiden), + WidenCR(1, true) {} MergeOptions &setMayIncludeUndef(bool V = true) { MayIncludeUndef = V; @@ -127,6 +133,11 @@ CheckWiden = V; return *this; } + + MergeOptions &setWidenCR(ConstantRange CR) { + WidenCR = CR; + return *this; + } }; // ConstVal and Range are initialized on-demand. @@ -341,10 +352,21 @@ if (getConstantRange() == NewR) return Tag != OldTag; - // Simple form of widening. If a range is extended multiple times, go to - // overdefined. - if (Opts.CheckWiden && ++NumRangeExtensions == 1) + // Simple form of widening. If a range is extended multiple times, use the + // provided WidenCR upper bound if it contains NewR or overdefined + // otherwise. + if (Opts.CheckWiden && ++NumRangeExtensions >= 1) { + // Make sure WidenCR is only used if it contains the merge result. + if (!Opts.WidenCR.isFullSet() && + (NewR == Opts.WidenCR || Opts.WidenCR.contains(NewR))) { + assert(NewR.contains(getConstantRange())); + bool Changed = Opts.WidenCR != getConstantRange(); + Range = std::move(Opts.WidenCR); + NumRangeExtensions = 1; + return Changed || Tag != OldTag; + } return markOverdefined(); + } assert(NewR.contains(getConstantRange()) && "Existing range must be a subset of NewR"); diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -1270,12 +1270,12 @@ ValueLatticeElement &IV = ValueState[&CB]; ValueLatticeElement OriginalVal = getValueState(CopyOf); if (CondVal.isConstantRange() || OriginalVal.isConstantRange()) { - auto NewCR = + auto ImposedCR = ConstantRange::getFull(DL.getTypeSizeInBits(CopyOf->getType())); // Get the range imposed by the condition. if (CondVal.isConstantRange()) - NewCR = ConstantRange::makeAllowedICmpRegion( + ImposedCR = ConstantRange::makeAllowedICmpRegion( Pred, CondVal.getConstantRange()); // Combine range info for the original value with the new range from the @@ -1284,7 +1284,7 @@ ? OriginalVal.getConstantRange() : ConstantRange::getFull( DL.getTypeSizeInBits(CopyOf->getType())); - NewCR = NewCR.intersectWith(OriginalCR); + auto NewCR = ImposedCR.intersectWith(OriginalCR); addAdditionalUser(CmpOp1, &CB); // TODO: Actually filp MayIncludeUndef for the created range to false, @@ -1298,7 +1298,9 @@ // inferred, but the branch will get folded accordingly anyways. mergeInValue( IV, &CB, - ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef=*/true)); + ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef=*/true), + ValueLatticeElement::MergeOptions().setCheckWiden().setWidenCR( + ImposedCR)); return; } else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) { // For non-integer values or integer constant expressions, only diff --git a/llvm/test/Transforms/SCCP/widening.ll b/llvm/test/Transforms/SCCP/widening.ll --- a/llvm/test/Transforms/SCCP/widening.ll +++ b/llvm/test/Transforms/SCCP/widening.ll @@ -357,8 +357,7 @@ ; IPSCCP-NEXT: [[C_1:%.*]] = icmp slt i32 [[IV]], 2 ; IPSCCP-NEXT: br i1 [[C_1]], label [[LOOP_BODY]], label [[EXIT:%.*]] ; IPSCCP: loop.body: -; IPSCCP-NEXT: [[T_1:%.*]] = icmp slt i32 [[IV]], 2 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) +; IPSCCP-NEXT: call void @use(i1 true) ; IPSCCP-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; IPSCCP-NEXT: br label [[LOOP_HEADER]] ; IPSCCP: exit: @@ -406,8 +405,7 @@ ; IPSCCP-NEXT: [[C_1:%.*]] = icmp slt i32 [[IV]], 200 ; IPSCCP-NEXT: br i1 [[C_1]], label [[LOOP_BODY]], label [[EXIT:%.*]] ; IPSCCP: loop.body: -; IPSCCP-NEXT: [[T_1:%.*]] = icmp slt i32 [[IV]], 200 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) +; IPSCCP-NEXT: call void @use(i1 true) ; IPSCCP-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; IPSCCP-NEXT: br label [[LOOP_HEADER]] ; IPSCCP: exit: