diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -113,10 +113,13 @@ /// number of steps. bool CheckWiden; + unsigned MaxWidenSteps; MergeOptions() : MergeOptions(false, false) {} - MergeOptions(bool MayIncludeUndef, bool CheckWiden) - : MayIncludeUndef(MayIncludeUndef), CheckWiden(CheckWiden) {} + MergeOptions(bool MayIncludeUndef, bool CheckWiden, + unsigned MaxWidenSteps = 1) + : MayIncludeUndef(MayIncludeUndef), CheckWiden(CheckWiden), + MaxWidenSteps(MaxWidenSteps) {} MergeOptions &setMayIncludeUndef(bool V = true) { MayIncludeUndef = V; @@ -127,6 +130,11 @@ CheckWiden = V; return *this; } + MergeOptions &setMaxWidenSteps(unsigned Steps = 1) { + CheckWiden = true; + MaxWidenSteps = Steps; + return *this; + } }; // ConstVal and Range are initialized on-demand. @@ -349,7 +357,7 @@ // Simple form of widening. If a range is extended multiple times, go to // overdefined. - if (Opts.CheckWiden && ++NumRangeExtensions == 1) + if (Opts.CheckWiden && ++NumRangeExtensions == Opts.MaxWidenSteps) return markOverdefined(); assert(NewR.contains(getConstantRange()) && diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -403,7 +403,7 @@ bool mergeInValue(ValueLatticeElement &IV, Value *V, ValueLatticeElement MergeWithV, ValueLatticeElement::MergeOptions Opts = { - /*MayIncludeUndef=*/false, /*CheckWiden=*/true}) { + /*MayIncludeUndef=*/false, /*CheckWiden=*/false}) { if (IV.mergeIn(MergeWithV, Opts)) { pushToWorkList(IV, V); LLVM_DEBUG(dbgs() << "Merged " << MergeWithV << " into " << *V << " : " @@ -415,7 +415,7 @@ bool mergeInValue(Value *V, ValueLatticeElement MergeWithV, ValueLatticeElement::MergeOptions Opts = { - /*MayIncludeUndef=*/false, /*CheckWiden=*/true}) { + /*MayIncludeUndef=*/false, /*CheckWiden=*/false}) { assert(!V->getType()->isStructTy() && "non-structs should use markConstant"); return mergeInValue(ValueState[V], V, MergeWithV, Opts); @@ -733,13 +733,17 @@ // constant. If they are constant and don't agree, the PHI is overdefined. // If there are no executable operands, the PHI remains unknown. bool Changed = false; + unsigned NumActiveIncoming = 0; for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { ValueLatticeElement IV = getValueState(PN.getIncomingValue(i)); if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent())) continue; + NumActiveIncoming++; ValueLatticeElement &Res = getValueState(&PN); - Changed |= Res.mergeIn(IV); + Changed |= + Res.mergeIn(IV, ValueLatticeElement::MergeOptions().setMaxWidenSteps( + NumActiveIncoming + 1)); if (Res.isOverdefined()) break; } @@ -1115,7 +1119,7 @@ auto It = TrackedGlobals.find(GV); if (It != TrackedGlobals.end()) { mergeInValue(IV, &I, It->second, - ValueLatticeElement::MergeOptions().setCheckWiden(false)); + ValueLatticeElement::MergeOptions().setMaxWidenSteps(10)); return; } } @@ -1203,11 +1207,13 @@ if (auto *STy = dyn_cast(AI->getType())) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { ValueLatticeElement CallArg = getStructValueState(*CAI, i); - mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg); + mergeInValue( + getStructValueState(&*AI, i), &*AI, CallArg, + ValueLatticeElement::MergeOptions().setMaxWidenSteps(10)); } } else mergeInValue(&*AI, getValueState(*CAI), - ValueLatticeElement::MergeOptions().setCheckWiden(false)); + ValueLatticeElement::MergeOptions().setMaxWidenSteps(10)); } } } @@ -1325,7 +1331,9 @@ return handleCallOverdefined(CB); // Not tracking this callee. // If so, propagate the return value of the callee into this call result. - mergeInValue(&CB, TFRVI->second); + // TODO: Need a test case for widening limit here. + mergeInValue(&CB, TFRVI->second, + ValueLatticeElement::MergeOptions().setMaxWidenSteps(10)); } } diff --git a/llvm/test/Transforms/SCCP/constant-range-struct.ll b/llvm/test/Transforms/SCCP/constant-range-struct.ll --- a/llvm/test/Transforms/SCCP/constant-range-struct.ll +++ b/llvm/test/Transforms/SCCP/constant-range-struct.ll @@ -102,22 +102,14 @@ ; CHECK-NEXT: [[S:%.*]] = call { i64, i64 } @struct2() ; CHECK-NEXT: [[V1:%.*]] = extractvalue { i64, i64 } [[S]], 0 ; CHECK-NEXT: [[V2:%.*]] = extractvalue { i64, i64 } [[S]], 1 -; CHECK-NEXT: [[T_1:%.*]] = icmp ne i64 [[V1]], 10 -; CHECK-NEXT: call void @use(i1 [[T_1]]) -; CHECK-NEXT: [[T_2:%.*]] = icmp ult i64 [[V1]], 100 -; CHECK-NEXT: call void @use(i1 [[T_2]]) -; CHECK-NEXT: [[T_3:%.*]] = icmp ne i64 [[V2]], 0 -; CHECK-NEXT: call void @use(i1 [[T_3]]) -; CHECK-NEXT: [[T_4:%.*]] = icmp ult i64 [[V2]], 301 -; CHECK-NEXT: call void @use(i1 [[T_4]]) -; CHECK-NEXT: [[F_1:%.*]] = icmp eq i64 [[V1]], 10 -; CHECK-NEXT: call void @use(i1 [[F_1]]) -; CHECK-NEXT: [[F_2:%.*]] = icmp ult i64 [[V1]], 19 -; CHECK-NEXT: call void @use(i1 [[F_2]]) -; CHECK-NEXT: [[F_3:%.*]] = icmp eq i64 [[V2]], 50 -; CHECK-NEXT: call void @use(i1 [[F_3]]) -; CHECK-NEXT: [[F_4:%.*]] = icmp ugt i64 [[V2]], 301 -; CHECK-NEXT: call void @use(i1 [[F_4]]) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[C_1:%.*]] = icmp eq i64 [[V1]], 25 ; CHECK-NEXT: call void @use(i1 [[C_1]]) ; CHECK-NEXT: [[C_2:%.*]] = icmp ult i64 [[V1]], 25 diff --git a/llvm/test/Transforms/SCCP/ipsccp-cycles.ll b/llvm/test/Transforms/SCCP/ipsccp-cycles.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SCCP/ipsccp-cycles.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -ipsccp -S | FileCheck %s + +define internal i32 @test1a(i32 %A, i32 %b) { +; CHECK-LABEL: @test1a( +; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 1 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], [[B:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[BB_TRUE:%.*]], label [[BB_FALSE:%.*]] +; CHECK: bb.true: +; CHECK-NEXT: [[R:%.*]] = call i32 @test1a(i32 [[X]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[R]] +; CHECK: bb.false: +; CHECK-NEXT: ret i32 [[A]] +; + %X = add i32 %A, 1 + %c = icmp eq i32 %X, %b + br i1 %c, label %bb.true, label %bb.false + +bb.true: + %r = call i32 @test1a(i32 %X, i32 %b) + ret i32 %r + +bb.false: + ret i32 %A +} + +define i32 @test1b(i32 %b) { +; CHECK-LABEL: @test1b( +; CHECK-NEXT: [[X:%.*]] = call i32 @test1a(i32 17, i32 [[B:%.*]]) +; CHECK-NEXT: ret i32 [[X]] +; + %X = call i32 @test1a( i32 17, i32 %b) + ret i32 %X +} + +@Getopt.optind = internal global i32 1, align 4 + +define i32 @test2(i32 %a) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* @Getopt.optind, align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* @Getopt.optind +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[ADD]], [[A:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + br label %loop + +loop: + %lv = load i32, i32* @Getopt.optind, align 4 + %add = add i32 %lv, 1 + store i32 %add, i32* @Getopt.optind + %c = icmp eq i32 %add, %a + br i1 %c, label %exit, label %loop + +exit: + ret i32 %add +} + + +define internal i32 @test3a(i32 %a) { +; CHECK-LABEL: @test3a( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = add i32 [[A:%.*]], 1 +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[RES]], 1000 +; CHECK-NEXT: br i1 [[C]], label [[BB_TRUE:%.*]], label [[BB_FALSE:%.*]] +; CHECK: bb.true: +; CHECK-NEXT: ret i32 [[RES]] +; CHECK: bb.false: +; CHECK-NEXT: ret i32 0 +; +entry: + %res = add i32 %a, 1 + %c = icmp ult i32 %res, 1000 + br i1 %c, label %bb.true, label %bb.false + +bb.true: + ret i32 %res + +bb.false: + ret i32 0 +} + +define i32 @test3b(i32 %a) { +; CHECK-LABEL: @test3b( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V1:%.*]] = call i32 @test3a(i32 0) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[V2:%.*]] = call i32 @test3a(i32 [[V1]]) +; CHECK-NEXT: [[V3:%.*]] = add i32 [[V2]], 1 +; CHECK-NEXT: [[V4:%.*]] = call i32 @test3a(i32 [[V3]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[V4]], [[A:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i32 [[V4]] +; +entry: + %v1 = call i32 @test3a(i32 0) + br label %loop + +loop: + %v2 = call i32 @test3a(i32 %v1) + %v3 = add i32 %v2, 1 + %v4 = call i32 @test3a(i32 %v3) + %c = icmp eq i32 %v4, %a + br i1 %c, label %exit, label %loop + +exit: + ret i32 %v4 +} diff --git a/llvm/test/Transforms/SCCP/resolvedundefsin-tracked-fn.ll b/llvm/test/Transforms/SCCP/resolvedundefsin-tracked-fn.ll --- a/llvm/test/Transforms/SCCP/resolvedundefsin-tracked-fn.ll +++ b/llvm/test/Transforms/SCCP/resolvedundefsin-tracked-fn.ll @@ -386,10 +386,7 @@ ; CHECK-NEXT: store i32 [[MUL]], i32* @pcount, align 4 ; CHECK-NEXT: ret void ; CHECK: if.end24: -; CHECK-NEXT: [[CMP25474:%.*]] = icmp sgt i32 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[CMP25474]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body: -; CHECK-NEXT: ret void +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SCCP/widening.ll b/llvm/test/Transforms/SCCP/widening.ll --- a/llvm/test/Transforms/SCCP/widening.ll +++ b/llvm/test/Transforms/SCCP/widening.ll @@ -17,10 +17,8 @@ ; SCCP: exit: ; SCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ] ; SCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 -; SCCP-NEXT: [[T_1:%.*]] = icmp ult i32 [[A]], 20 -; SCCP-NEXT: call void @use(i1 [[T_1]]) -; SCCP-NEXT: [[F_1:%.*]] = icmp ugt i32 [[A]], 10 -; SCCP-NEXT: call void @use(i1 [[F_1]]) +; SCCP-NEXT: call void @use(i1 true) +; SCCP-NEXT: call void @use(i1 false) ; SCCP-NEXT: ret void ; ; IPSCCP-LABEL: @test_2_incoming_constants( @@ -32,10 +30,8 @@ ; IPSCCP: exit: ; IPSCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ] ; IPSCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 -; IPSCCP-NEXT: [[T_1:%.*]] = icmp ult i32 [[A]], 20 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) -; IPSCCP-NEXT: [[F_1:%.*]] = icmp ugt i32 [[A]], 10 -; IPSCCP-NEXT: call void @use(i1 [[F_1]]) +; IPSCCP-NEXT: call void @use(i1 true) +; IPSCCP-NEXT: call void @use(i1 false) ; IPSCCP-NEXT: ret void ; entry: @@ -68,10 +64,8 @@ ; SCCP: exit: ; SCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ] ; SCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 -; SCCP-NEXT: [[T_1:%.*]] = icmp ult i32 [[A]], 20 -; SCCP-NEXT: call void @use(i1 [[T_1]]) -; SCCP-NEXT: [[F_1:%.*]] = icmp ugt i32 [[A]], 10 -; SCCP-NEXT: call void @use(i1 [[F_1]]) +; SCCP-NEXT: call void @use(i1 true) +; SCCP-NEXT: call void @use(i1 false) ; SCCP-NEXT: ret void ; ; IPSCCP-LABEL: @test_3_incoming_constants( @@ -86,10 +80,8 @@ ; IPSCCP: exit: ; IPSCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ] ; IPSCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 -; IPSCCP-NEXT: [[T_1:%.*]] = icmp ult i32 [[A]], 20 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) -; IPSCCP-NEXT: [[F_1:%.*]] = icmp ugt i32 [[A]], 10 -; IPSCCP-NEXT: call void @use(i1 [[F_1]]) +; IPSCCP-NEXT: call void @use(i1 true) +; IPSCCP-NEXT: call void @use(i1 false) ; IPSCCP-NEXT: ret void ; entry: @@ -132,10 +124,8 @@ ; SCCP: exit: ; SCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ], [ 3, [[BB3]] ], [ 4, [[BB4]] ] ; SCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 -; SCCP-NEXT: [[T_1:%.*]] = icmp ult i32 [[A]], 20 -; SCCP-NEXT: call void @use(i1 [[T_1]]) -; SCCP-NEXT: [[F_1:%.*]] = icmp ugt i32 [[A]], 10 -; SCCP-NEXT: call void @use(i1 [[F_1]]) +; SCCP-NEXT: call void @use(i1 true) +; SCCP-NEXT: call void @use(i1 false) ; SCCP-NEXT: ret void ; ; IPSCCP-LABEL: @test_5_incoming_constants( @@ -156,10 +146,8 @@ ; IPSCCP: exit: ; IPSCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ], [ 3, [[BB3]] ], [ 4, [[BB4]] ] ; IPSCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 -; IPSCCP-NEXT: [[T_1:%.*]] = icmp ult i32 [[A]], 20 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) -; IPSCCP-NEXT: [[F_1:%.*]] = icmp ugt i32 [[A]], 10 -; IPSCCP-NEXT: call void @use(i1 [[F_1]]) +; IPSCCP-NEXT: call void @use(i1 true) +; IPSCCP-NEXT: call void @use(i1 false) ; IPSCCP-NEXT: ret void ; entry: @@ -369,8 +357,7 @@ ; IPSCCP-NEXT: [[C_1:%.*]] = icmp slt i32 [[IV]], 2 ; IPSCCP-NEXT: br i1 [[C_1]], label [[LOOP_BODY]], label [[EXIT:%.*]] ; IPSCCP: loop.body: -; IPSCCP-NEXT: [[T_1:%.*]] = icmp slt i32 [[IV]], 2 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) +; IPSCCP-NEXT: call void @use(i1 true) ; IPSCCP-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; IPSCCP-NEXT: br label [[LOOP_HEADER]] ; IPSCCP: exit: @@ -418,8 +405,7 @@ ; IPSCCP-NEXT: [[C_1:%.*]] = icmp slt i32 [[IV]], 200 ; IPSCCP-NEXT: br i1 [[C_1]], label [[LOOP_BODY]], label [[EXIT:%.*]] ; IPSCCP: loop.body: -; IPSCCP-NEXT: [[T_1:%.*]] = icmp slt i32 [[IV]], 200 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) +; IPSCCP-NEXT: call void @use(i1 true) ; IPSCCP-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; IPSCCP-NEXT: br label [[LOOP_HEADER]] ; IPSCCP: exit: