Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -39,6 +39,33 @@ const unsigned MaxDepth = 6; +/// Enable an experimental feature to leverage information about dominating +/// conditions to compute known bits. The individual options below control how +/// hard we search. The defaults are choosen to be fairly aggressive. If you +/// run into compile time problems when testing, scale them back and report +/// your findings. +static cl::opt EnableDomConditions("value-tracking-dom-conditions", + cl::Hidden, cl::init(false)); + +// This is expensive, so we only do it for the top level query value. +// (TODO: evaluate cost vs profit, consider higher thresholds) +static cl::opt DomConditionsMaxDepth("dom-conditions-max-depth", + cl::Hidden, cl::init(1)); + +/// How many dominating blocks should be scanned looking for dominating +/// conditions? +static cl::opt DomConditionsMaxDomBlocks("dom-conditions-dom-blocks", + cl::Hidden, cl::init(20000)); + +// Controls the number of uses of the value searched for possible +// dominating comparisons. +static cl::opt DomConditionsMaxUses("dom-conditions-max-uses", + cl::Hidden, cl::init(2000)); + +// If true, don't consider only compares whose only use is a branch. +static cl::opt DomConditionsSingleCmpUse("dom-conditions-single-cmp-use", + cl::Hidden, cl::init(false)); + /// Returns the bitwidth of the given scalar or pointer type (if unknown returns /// 0). For vector types, returns the element type's bitwidth. static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { @@ -474,6 +501,186 @@ return m_CombineOr(m_Xor(L, R), m_Xor(R, L)); } + +/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is +/// true (at the context instruction.) This is mostly a utility function for +/// the prototype dominating conditions reasoning below. +static void computeKnownBitsFromTrueCondition(Value *V, + ICmpInst *Cmp, + APInt &KnownZero, + APInt &KnownOne, + const DataLayout *TD, + unsigned Depth, + const Query &Q) { + Value *LHS = Cmp->getOperand(0); + Value *RHS = Cmp->getOperand(1); + // TODO: We could potentially be more aggressive here. This would be worth + // evaluating. If we can, explore commoning this code with the assume + // handling logic. + if (LHS != V && RHS != V) + return; + + const unsigned BitWidth = KnownZero.getBitWidth(); + + switch (Cmp->getPredicate()) { + default: + // We know nothing from this condition + break; + // TODO: implement unsigned bound from below (known one bits) + // TODO: common condition check implementations with assumes + // TODO: implement other patterns from assume (e.g. V & B == A) + case ICmpInst::ICMP_SGT: + if (LHS == V) { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, TD, Depth+1, Q); + if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) { + // We know that the sign bit is zero. + KnownZero |= APInt::getSignBit(BitWidth); + } + } + break; + case ICmpInst::ICMP_EQ: + if (LHS == V) + computeKnownBits(RHS, KnownZero, KnownOne, TD, Depth+1, Q); + else if (RHS == V) + computeKnownBits(LHS, KnownZero, KnownOne, TD, Depth+1, Q); + else + llvm_unreachable("missing use?"); + break; + case ICmpInst::ICMP_ULE: + if (LHS == V) { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, TD, Depth+1, Q); + // The known zero bits carry over + unsigned SignBits = KnownZeroTemp.countLeadingOnes(); + KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits); + } + break; + case ICmpInst::ICMP_ULT: + if (LHS == V) { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, TD, Depth+1, Q); + // Whatever high bits in rhs are zero are known to be zero (if rhs is a + // power of 2, then one more). + unsigned SignBits = KnownZeroTemp.countLeadingOnes(); + if (isKnownToBeAPowerOfTwo(RHS, false, Depth+1, Query(Q, Cmp))) + SignBits++; + KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits); + } + break; + }; +} + +/// Compute known bits in 'V' from conditions which are known to be true along +/// all paths leading to the context instruction. In particular, look for +/// cases where one branch of an interesting condition dominates the context +/// instruction. This does not do general dataflow. +/// NOTE: This code is EXPERIMENTAL and currently off by default. +static void computeKnownBitsFromDominatingCondition(Value *V, + APInt &KnownZero, + APInt &KnownOne, + const DataLayout *TD, + unsigned Depth, + const Query &Q) { + // Need both the dominator tree and the query location to do anything useful + if (!Q.DT || !Q.CxtI) + return; + Instruction *Cxt = const_cast(Q.CxtI); + + // Avoid useless work + if (auto VI = dyn_cast(V)) + if (VI->getParent() == Cxt->getParent()) + return; + + // Note: We currently implement two options. It's not clear which of these + // will survive long term, we need data for that. + // Option 1 - Try walking the dominator tree looking for conditions which + // might apply. This works well for local conditions (loop guards, etc..), + // but not as well for things far from the context instruction (presuming a + // low max blocks explored). If we can set an high enough limit, this would + // be all we need. + // Option 2 - We restrict out search to those conditions which are uses of the + // value we're interested in. This is independent of dom structure, but is + // slightly less powerful without looking through lots of use chains. It + // does handle conditions far from the context instruction (e.g. early + // function exits on entry) really well though. + + // Option 1 - Search the dom tree + unsigned NumBlocksExplored = 0; + BasicBlock *Current = Cxt->getParent(); + while (true) { + // Stop searching if we've gone too far up the chain + if (NumBlocksExplored >= DomConditionsMaxDomBlocks) + break; + NumBlocksExplored++; + + if (!Q.DT->getNode(Current)->getIDom()) + break; + Current = Q.DT->getNode(Current)->getIDom()->getBlock(); + if (!Current) + // found function entry + break; + + BranchInst *BI = dyn_cast(Current->getTerminator()); + if (!BI || + BI->isUnconditional()) + continue; + ICmpInst *Cmp = dyn_cast(BI->getCondition()); + if (!Cmp) + continue; + + // We're looking for conditions that are guaranteed to hold at the context + // instruction. Finding a condition where one path dominates the context + // isn't enough because both the true and false cases could merge before + // the context instruction we're actually interested in. Instead, we need + // to ensure that the taken *edge* dominates the context instruction. + BasicBlock *BB0 = BI->getSuccessor(0); + if (!Q.DT->dominates(BasicBlockEdge(BI->getParent(), BB0), + Q.CxtI->getParent())) + return; + + computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, TD, Depth, + Q); + } + + + // Option 2 - Search the other uses of V + unsigned NumUsesExplored = 0; + for (auto U : V->users()) { + // Avoid massive lists + if (NumUsesExplored >= DomConditionsMaxUses) + break; + NumUsesExplored++; + // Consider only compare instructions uniquely controlling a branch + ICmpInst *Cmp = dyn_cast(U); + if (!Cmp) + continue; + + if (DomConditionsSingleCmpUse && + !Cmp->hasOneUse()) + continue; + + for (auto *CmpU : Cmp->users()) { + BranchInst *BI = dyn_cast(CmpU); + if (!BI || + BI->isUnconditional()) + continue; + // We're looking for conditions that are guaranteed to hold at the context + // instruction. Finding a condition where one path dominates the context + // isn't enough because both the true and false cases could merge before + // the context instruction we're actually interested in. Instead, we need + // to ensure that the taken *edge* dominates the context instruction. + BasicBlock *BB0 = BI->getSuccessor(0); + if (!Q.DT->dominates(BasicBlockEdge(BI->getParent(), BB0), + Q.CxtI->getParent())) + return; + + computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, TD, Depth, + Q); + } + } +} + static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout *DL, @@ -839,6 +1046,12 @@ // Don't give up yet... there might be an assumption that provides more // information... computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q); + + // Or a dominating condition for that matter + if(EnableDomConditions && Depth <= DomConditionsMaxDepth) { + computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, + TD, Depth, Q); + } return; } @@ -860,6 +1073,13 @@ // Check whether a nearby assume intrinsic can determine some known bits. computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q); + // Check whether there's a dominating condition which implies something about + // this value at the given context. + if(EnableDomConditions && Depth <= DomConditionsMaxDepth) { + computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, + TD, Depth, Q); + } + Operator *I = dyn_cast(V); if (!I) return; Index: test/Transforms/InstCombine/dom-conditions.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/dom-conditions.ll @@ -0,0 +1,105 @@ +; RUN: opt -instcombine -value-tracking-dom-conditions=1 -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +define i1 @test_cmp_ult(i64 %A) { +; CHECK-LABEL: @test_cmp_ult +entry: + %cmp = icmp ult i64 %A, 64 + br i1 %cmp, label %taken, label %untaken + +taken: +; CHECK-LABEL: taken: +; CHECK-NEXT: ret i1 false + %cmp2 = icmp ugt i64 %A, 64 + ret i1 %cmp2 +untaken: + ret i1 true +} + +define i1 @test_cmp_ule(i64 %A) { +; CHECK-LABEL: @test_cmp_ule +entry: + %cmp = icmp ule i64 %A, 64 + br i1 %cmp, label %taken, label %untaken + +taken: +; CHECK-LABEL: taken: +; CHECK-NEXT: ret i1 false + %cmp2 = icmp ugt i64 %A, 128 + ret i1 %cmp2 +untaken: + ret i1 true +} + +define i1 @test_cmp_sgt(i32 %A) { +; CHECK-LABEL: @test_cmp_sgt +entry: + %cmp = icmp sgt i32 %A, 10 + br i1 %cmp, label %taken, label %untaken + +taken: +; CHECK-LABEL: taken: +; CHECK-NEXT: ret i1 true + %cmp2 = icmp sgt i32 %A, -1 + ret i1 %cmp2 +untaken: + ret i1 true +} + + + +define i64 @test_add_zero_bits(i64 %A) { +; CHECK-LABEL: @test_add_zero_bits +entry: + %cmp = icmp eq i64 %A, 2 + br i1 %cmp, label %taken, label %untaken + +taken: +; CHECK-LABEL: taken: +; CHECK-NEXT: %add = or i64 %A, 1 +; CHECK-NEXT: ret i64 %add + %add = add i64 %A, 1 + ret i64 %add +untaken: + ret i64 %A +} + +define i64 @test_add_nsw(i64 %A) { +; CHECK-LABEL: @test_add_nsw +entry: + %cmp = icmp ult i64 %A, 20 + br i1 %cmp, label %taken, label %untaken + +taken: +; CHECK-LABEL: taken: +; CHECK-NEXT: %add = add nuw nsw i64 %A, 1 +; CHECK-NEXT: ret i64 %add + %add = add i64 %A, 1 + ret i64 %add +untaken: + ret i64 %A +} + +; After sinking the instructions into the if block, check that we +; can simplify some of them using dominating conditions. +define i32 @test_add_zero_bits_sink(i32 %x) nounwind ssp { +; CHECK-LABEL: @test_add_zero_bits_sink( +; CHECK-NOT: sdiv i32 +entry: + %a = add nsw i32 %x, 16 ; [#uses=1] + %b = sdiv i32 %a, %x ; [#uses=1] + %cmp = icmp ult i32 %x, 7 ; [#uses=1] + br i1 %cmp, label %bb1, label %bb2 + +bb1: ; preds = %bb +; CHECK-LABEL: bb1: +; CHECK-NEXT: or i32 %x, 16 +; CHECK-NEXT: sdiv i32 + ret i32 %b + +bb2: ; preds = %bb, %bb1 + ret i32 %x +} + +declare i32 @bar()