Index: lib/Analysis/LazyValueInfo.cpp =================================================================== --- lib/Analysis/LazyValueInfo.cpp +++ lib/Analysis/LazyValueInfo.cpp @@ -342,6 +342,21 @@ /// recursive value lookup process. std::stack > BlockValueStack; + /// BlockValueSet - Keeps track of which block-value pairs are in + /// BlockValueStack. + DenseSet > BlockValueSet; + + /// pushBlockValue - Push BV onto BlockValueStack unless it's already in + /// there. Returns true on success. + bool pushBlockValue(const std::pair &BV) { + if (BlockValueSet.count(BV)) + return false; // It's already in the stack. + + BlockValueStack.push(BV); + BlockValueSet.insert(BV); + return true; + } + /// A pointer to the cache of @llvm.assume calls. AssumptionTracker *AT; /// An optional DL pointer. @@ -350,27 +365,13 @@ DominatorTree *DT; friend struct LVIValueHandle; - - /// OverDefinedCacheUpdater - A helper object that ensures that the - /// OverDefinedCache is updated whenever solveBlockValue returns. - struct OverDefinedCacheUpdater { - LazyValueInfoCache *Parent; - Value *Val; - BasicBlock *BB; - LVILatticeVal &BBLV; - - OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV, - LazyValueInfoCache *P) - : Parent(P), Val(V), BB(B), BBLV(LV) { } - - bool markResult(bool changed) { - if (changed && BBLV.isOverdefined()) - Parent->OverDefinedCache.insert(std::make_pair(BB, Val)); - return changed; - } - }; - + void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) { + SeenBlocks.insert(BB); + lookup(Val)[BB] = Result; + if (Result.isOverdefined()) + OverDefinedCache.insert(std::make_pair(BB, Val)); + } LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T, @@ -472,9 +473,18 @@ void LazyValueInfoCache::solve() { while (!BlockValueStack.empty()) { std::pair &e = BlockValueStack.top(); + assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!"); + if (solveBlockValue(e.second, e.first)) { - assert(BlockValueStack.top() == e); + // The work item was completely processed. + assert(BlockValueStack.top() == e && "Nothing should have been pushed!"); + assert(lookup(e.second).count(e.first) && "Result should be in cache!"); + BlockValueStack.pop(); + BlockValueSet.erase(e); + } else { + // More work needs to be done before revisiting. + assert(BlockValueStack.top() != e && "Stack should have been pushed!"); } } } @@ -504,43 +514,40 @@ if (isa(Val)) return true; - ValueCacheEntryTy &Cache = lookup(Val); - SeenBlocks.insert(BB); - LVILatticeVal &BBLV = Cache[BB]; - - // OverDefinedCacheUpdater is a helper object that will update - // the OverDefinedCache for us when this method exits. Make sure to - // call markResult on it as we exit, passing a bool to indicate if the - // cache needs updating, i.e. if we have solved a new value or not. - OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this); - - if (!BBLV.isUndefined()) { - DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n'); - - // Since we're reusing a cached value here, we don't need to update the - // OverDefinedCache. The cache will have been properly updated - // whenever the cached value was inserted. - ODCacheUpdater.markResult(false); + if (lookup(Val).count(BB)) { + // If we have a cached value, use that. + DEBUG(dbgs() << " reuse BB '" << BB->getName() + << "' val=" << lookup(Val)[BB] << '\n'); + + // Since we're reusing a cached value, we don't need to update the + // OverDefinedCache. The cache will have been properly updated whenever the + // cached value was inserted. return true; } - // Otherwise, this is the first time we're seeing this block. Reset the - // lattice value to overdefined, so that cycles will terminate and be - // conservatively correct. - BBLV.markOverdefined(); + // Hold off inserting this value into the Cache in case we have to return + // false and come back later. + LVILatticeVal Res; Instruction *BBI = dyn_cast(Val); if (!BBI || BBI->getParent() != BB) { - return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB)); + if (!solveBlockValueNonLocal(Res, Val, BB)) + return false; + insertResult(Val, BB, Res); + return true; } if (PHINode *PN = dyn_cast(BBI)) { - return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB)); + if (!solveBlockValuePHINode(Res, PN, BB)) + return false; + insertResult(Val, BB, Res); + return true; } if (AllocaInst *AI = dyn_cast(BBI)) { - BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType())); - return ODCacheUpdater.markResult(true); + Res = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType())); + insertResult(Val, BB, Res); + return true; } // We can only analyze the definitions of certain classes of instructions @@ -550,8 +557,9 @@ !BBI->getType()->isIntegerTy()) { DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because inst def found.\n"); - BBLV.markOverdefined(); - return ODCacheUpdater.markResult(true); + Res.markOverdefined(); + insertResult(Val, BB, Res); + return true; } // FIXME: We're currently limited to binops with a constant RHS. This should @@ -561,11 +569,15 @@ DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because inst def found.\n"); - BBLV.markOverdefined(); - return ODCacheUpdater.markResult(true); + Res.markOverdefined(); + insertResult(Val, BB, Res); + return true; } - return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB)); + if (!solveBlockValueConstantRange(Res, BBI, BB)) + return false; + insertResult(Val, BB, Res); + return true; } static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { @@ -745,8 +757,10 @@ BasicBlock *BB) { // Figure out the range of the LHS. If that fails, bail. if (!hasBlockValue(BBI->getOperand(0), BB)) { - BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0))); - return false; + if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0)))) + return false; + BBLV.markOverdefined(); + return true; } LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); @@ -940,8 +954,10 @@ // LVI better supports recursive values. Even for the single value case, we // can intersect to detect dead code (an empty range). if (!hasBlockValue(Val, BBFrom)) { - BlockValueStack.push(std::make_pair(BBFrom, Val)); - return false; + if (pushBlockValue(std::make_pair(BBFrom, Val))) + return false; + Result.markOverdefined(); + return true; } // Try to intersect ranges of the BB and the constraint on the edge. @@ -960,8 +976,10 @@ } if (!hasBlockValue(Val, BBFrom)) { - BlockValueStack.push(std::make_pair(BBFrom, Val)); - return false; + if (pushBlockValue(std::make_pair(BBFrom, Val))) + return false; + Result.markOverdefined(); + return true; } // If we couldn't compute the value on the edge, use the value from the BB. @@ -984,7 +1002,9 @@ DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" << BB->getName() << "'\n"); - BlockValueStack.push(std::make_pair(BB, V)); + assert(BlockValueStack.empty() && BlockValueSet.empty()); + pushBlockValue(std::make_pair(BB, V)); + solve(); LVILatticeVal Result = getBlockValue(V, BB); mergeAssumeBlockValueConstantRange(V, Result, CxtI); Index: test/Transforms/CorrelatedValuePropagation/icmp.ll =================================================================== --- /dev/null +++ test/Transforms/CorrelatedValuePropagation/icmp.ll @@ -0,0 +1,63 @@ +; RUN: opt -correlated-propagation -S %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; Function Attrs: noreturn +declare void @check1(i1) #1 + +; Function Attrs: noreturn +declare void @check2(i1) #1 + +; Make sure we propagate the value of %tmp35 to the true/false cases +; CHECK-LABEL: @test1 +; CHECK: call void @check1(i1 false) +; CHECK: call void @check2(i1 true) +define void @test1(i64 %tmp35) { +bb: + %tmp36 = icmp sgt i64 %tmp35, 0 + br i1 %tmp36, label %bb_true, label %bb_false + +bb_true: + %tmp47 = icmp slt i64 %tmp35, 0 + tail call void @check1(i1 %tmp47) #4 + unreachable + +bb_false: + %tmp48 = icmp sle i64 %tmp35, 0 + tail call void @check2(i1 %tmp48) #4 + unreachable +} + +; Function Attrs: noreturn +; This is the same as test1 but with a diamond to ensure we +; get %tmp36 from both true and false BBs. +; CHECK-LABEL: @test2 +; CHECK: call void @check1(i1 false) +; CHECK: call void @check2(i1 true) +define void @test2(i64 %tmp35, i1 %inner_cmp) { +bb: + %tmp36 = icmp sgt i64 %tmp35, 0 + br i1 %tmp36, label %bb_true, label %bb_false + +bb_true: + br i1 %inner_cmp, label %inner_true, label %inner_false + +inner_true: + br label %merge + +inner_false: + br label %merge + +merge: + %tmp47 = icmp slt i64 %tmp35, 0 + tail call void @check1(i1 %tmp47) #0 + unreachable + +bb_false: + %tmp48 = icmp sle i64 %tmp35, 0 + tail call void @check2(i1 %tmp48) #4 + unreachable +} + +attributes #4 = { noreturn } Index: test/Transforms/JumpThreading/conservative-lvi.ll =================================================================== --- /dev/null +++ test/Transforms/JumpThreading/conservative-lvi.ll @@ -0,0 +1,58 @@ +; RUN: opt -jump-threading -S %s | FileCheck %s + +; Check that we thread arg2neg -> checkpos -> end. +; +; LazyValueInfo would previously fail to analyze the value of %arg in arg2neg +; because its predecessing blocks (checkneg) hadn't been processed yet (PR21238) + +; CHECK-LABEL: @test_jump_threading +; CHECK: arg2neg: +; CHECK-NEXT: br i1 %arg1, label %end, label %checkpos.thread +; CHECK: checkpos.thread: +; CHECK-NEXT: br label %end + +define i32 @test_jump_threading(i1 %arg1, i32 %arg2) { +checkneg: + %cmp = icmp slt i32 %arg2, 0 + br i1 %cmp, label %arg2neg, label %checkpos + +arg2neg: + br i1 %arg1, label %end, label %checkpos + +checkpos: + %cmp2 = icmp sgt i32 %arg2, 0 + br i1 %cmp2, label %arg2pos, label %end + +arg2pos: + br label %end + +end: + %0 = phi i32 [ 1, %arg2neg ], [ 2, %checkpos ], [ 3, %arg2pos ] + ret i32 %0 +} + + +; arg2neg has an edge back to itself. If LazyValueInfo is not careful when +; visiting predecessors, it could get into an infinite loop. + +; CHECK-LABEL: test_infinite_loop + +define i32 @test_infinite_loop(i1 %arg1, i32 %arg2) { +checkneg: + %cmp = icmp slt i32 %arg2, 0 + br i1 %cmp, label %arg2neg, label %checkpos + +arg2neg: + br i1 %arg1, label %arg2neg, label %checkpos + +checkpos: + %cmp2 = icmp sgt i32 %arg2, 0 + br i1 %cmp2, label %arg2pos, label %end + +arg2pos: + br label %end + +end: + %0 = phi i32 [ 2, %checkpos ], [ 3, %arg2pos ] + ret i32 %0 +}