diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -97,10 +97,16 @@ /// number of steps. bool CheckWiden; + /// Provides a known upper bound for the merge result. Use this range + /// instead of going to overdefined for widening, if WidenCR contains the + /// merge result. + ConstantRange WidenCR; + MergeOptions() : MergeOptions(false, false) {} MergeOptions(bool MayIncludeUndef, bool CheckWiden) - : MayIncludeUndef(MayIncludeUndef), CheckWiden(CheckWiden) {} + : MayIncludeUndef(MayIncludeUndef), CheckWiden(CheckWiden), + WidenCR(1, true) {} MergeOptions &setMayIncludeUndef(bool V = true) { MayIncludeUndef = V; @@ -111,6 +117,11 @@ CheckWiden = V; return *this; } + + MergeOptions &setWidenCR(ConstantRange CR) { + WidenCR = CR; + return *this; + } }; // Const and Range are initialized on-demand. @@ -336,10 +347,21 @@ if (getConstantRange() == NewR) return Tag != OldTag; - // Simple form of widening. If a range is extended multiple times, go to - // overdefined. - if (Opts.CheckWiden && ++NumRangeExtensions == 1) + // Simple form of widening. If a range is extended multiple times, use the + // provided WidenCR upper bound if it contains NewR or overdefined + // otherwise. + if (Opts.CheckWiden && ++NumRangeExtensions >= 1) { + // Make sure WidenCR is only used if it contains the merge result. + if (!Opts.WidenCR.isFullSet() && + (NewR == Opts.WidenCR || Opts.WidenCR.contains(NewR))) { + assert(NewR.contains(getConstantRange())); + bool Changed = Opts.WidenCR != getConstantRange(); + Range = std::move(Opts.WidenCR); + NumRangeExtensions = 1; + return Changed || Tag != OldTag; + } return markOverdefined(); + } assert(NewR.contains(getConstantRange()) && "Existing range must be a subset of NewR"); diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -1274,12 +1274,12 @@ ValueLatticeElement &IV = ValueState[I]; ValueLatticeElement OriginalVal = getValueState(CopyOf); if (CondVal.isConstantRange() || OriginalVal.isConstantRange()) { - auto NewCR = + auto ImposedCR = ConstantRange::getFull(DL.getTypeSizeInBits(CopyOf->getType())); // Get the range imposed by the condition. if (CondVal.isConstantRange()) - NewCR = ConstantRange::makeAllowedICmpRegion( + ImposedCR = ConstantRange::makeAllowedICmpRegion( Pred, CondVal.getConstantRange()); // Combine range info for the original value with the new range from the @@ -1288,7 +1288,7 @@ ? OriginalVal.getConstantRange() : ConstantRange::getFull( DL.getTypeSizeInBits(CopyOf->getType())); - NewCR = NewCR.intersectWith(OriginalCR); + auto NewCR = ImposedCR.intersectWith(OriginalCR); addAdditionalUser(CmpOp1, I); // TODO: Actually filp MayIncludeUndef for the created range to false, @@ -1302,7 +1302,9 @@ // inferred, but the branch will get folded accordingly anyways. mergeInValue( IV, I, - ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef=*/true)); + ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef=*/true), + ValueLatticeElement::MergeOptions().setCheckWiden().setWidenCR( + ImposedCR)); return; } else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) { // For non-integer values or integer constant expressions, only diff --git a/llvm/test/Transforms/SCCP/widening.ll b/llvm/test/Transforms/SCCP/widening.ll --- a/llvm/test/Transforms/SCCP/widening.ll +++ b/llvm/test/Transforms/SCCP/widening.ll @@ -369,8 +369,7 @@ ; IPSCCP-NEXT: [[C_1:%.*]] = icmp slt i32 [[IV]], 2 ; IPSCCP-NEXT: br i1 [[C_1]], label [[LOOP_BODY]], label [[EXIT:%.*]] ; IPSCCP: loop.body: -; IPSCCP-NEXT: [[T_1:%.*]] = icmp slt i32 [[IV]], 2 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) +; IPSCCP-NEXT: call void @use(i1 true) ; IPSCCP-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; IPSCCP-NEXT: br label [[LOOP_HEADER]] ; IPSCCP: exit: @@ -418,8 +417,7 @@ ; IPSCCP-NEXT: [[C_1:%.*]] = icmp slt i32 [[IV]], 200 ; IPSCCP-NEXT: br i1 [[C_1]], label [[LOOP_BODY]], label [[EXIT:%.*]] ; IPSCCP: loop.body: -; IPSCCP-NEXT: [[T_1:%.*]] = icmp slt i32 [[IV]], 200 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) +; IPSCCP-NEXT: call void @use(i1 true) ; IPSCCP-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; IPSCCP-NEXT: br label [[LOOP_HEADER]] ; IPSCCP: exit: @@ -741,36 +739,12 @@ ; IPSCCP-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[TMP32]], i64 1 ; IPSCCP-NEXT: br label [[BB66:%.*]] ; IPSCCP: bb37: -; IPSCCP-NEXT: [[C_2:%.*]] = icmp eq i32 [[TMP11]], 8 -; IPSCCP-NEXT: br i1 [[C_2]], label [[BB39:%.*]], label [[BB58:%.*]] -; IPSCCP: bb39: -; IPSCCP-NEXT: [[TMP40:%.*]] = add nsw i32 [[TMP11]], -1 -; IPSCCP-NEXT: [[TMP41:%.*]] = trunc i32 [[TMP3]] to i16 -; IPSCCP-NEXT: store i16 [[TMP41]], i16* bitcast ([4 x i8]* @global.11 to i16*), align 1 -; IPSCCP-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_BLAM_2]], %struct.blam.2* [[ARG]], i32 0, i32 0 -; IPSCCP-NEXT: [[TMP43:%.*]] = add i32 [[TMP7]], [[TMP40]] -; IPSCCP-NEXT: [[TMP44:%.*]] = mul i32 [[TMP43]], 4 -; IPSCCP-NEXT: [[TMP45:%.*]] = add i32 [[TMP44]], 2 -; IPSCCP-NEXT: [[TMP46:%.*]] = call dereferenceable(1) i8* @spam(%struct.baz.1* [[TMP42]], i32 [[TMP45]]) -; IPSCCP-NEXT: [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1 -; IPSCCP-NEXT: [[TMP48:%.*]] = zext i8 [[TMP47]] to i32 -; IPSCCP-NEXT: [[TMP49:%.*]] = sub i32 [[TMP43]], 1 -; IPSCCP-NEXT: [[TMP50:%.*]] = mul i32 [[TMP49]], 4 -; IPSCCP-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], 2 -; IPSCCP-NEXT: [[TMP52:%.*]] = call dereferenceable(1) i8* @spam(%struct.baz.1* [[TMP42]], i32 [[TMP51]]) -; IPSCCP-NEXT: [[TMP53:%.*]] = load i8, i8* [[TMP52]], align 1 -; IPSCCP-NEXT: [[TMP54:%.*]] = zext i8 [[TMP53]] to i32 -; IPSCCP-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP48]], [[TMP54]] -; IPSCCP-NEXT: br i1 [[TMP55]], label [[BB56:%.*]], label [[BB60:%.*]] -; IPSCCP: bb56: -; IPSCCP-NEXT: [[TMP57:%.*]] = add nsw i32 [[TMP40]], -1 -; IPSCCP-NEXT: br label [[BB60]] +; IPSCCP-NEXT: br label [[BB58:%.*]] ; IPSCCP: bb58: ; IPSCCP-NEXT: [[TMP59:%.*]] = bitcast i16* [[TMP33]] to i8* ; IPSCCP-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr inbounds ([4 x i8], [4 x i8]* @global.11, i64 0, i64 0), i8* align 2 [[TMP59]], i64 4, i1 false) -; IPSCCP-NEXT: br label [[BB60]] +; IPSCCP-NEXT: br label [[BB60:%.*]] ; IPSCCP: bb60: -; IPSCCP-NEXT: [[TMP61:%.*]] = phi i32 [ [[TMP57]], [[BB56]] ], [ [[TMP40]], [[BB39]] ], [ [[TMP11]], [[BB58]] ] ; IPSCCP-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_BLAM_2]], %struct.blam.2* [[ARG]], i32 0, i32 0 ; IPSCCP-NEXT: [[TMP63:%.*]] = add i32 [[TMP7]], 1 ; IPSCCP-NEXT: [[TMP64:%.*]] = mul i32 [[TMP63]], 4