Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -39,6 +39,23 @@ const unsigned MaxDepth = 6; +static cl::opt EnableDomConditions("value-tracking-dom-conditions", + cl::Hidden, cl::init(false)); + +// If true, don't consider only compares whose only use is a branch. +static cl::opt DomConditionsSingleCmpUse("dom-conditions-single-cmp-use", + cl::Hidden, cl::init(false)); + + +// Controls the number of uses of the value searched for possible +// dominating comparisons. +static cl::opt DomConditionsMaxUses("dom-conditions-max-uses", + cl::Hidden, cl::init(20)); +// This is expensive, so we only do it for the top level query value. +// (TODO: evaluate cost vs profit, consider higher thresholds) +static cl::opt DomConditionsMaxDepth("dom-conditions-max-depth", + cl::Hidden, cl::init(0)); + /// Returns the bitwidth of the given scalar or pointer type (if unknown returns /// 0). For vector types, returns the element type's bitwidth. static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { @@ -474,6 +491,113 @@ return m_CombineOr(m_Xor(L, R), m_Xor(R, L)); } + +static void computeKnownBitsFromDominatingCondition(Value *V, + APInt &KnownZero, + APInt &KnownOne, + const DataLayout *TD, + unsigned Depth, + const Query &Q) { + // Need both the dominator tree and the query location to do anything useful + if (!Q.DT || !Q.CxtI) + return; + Instruction *Cxt = const_cast(Q.CxtI); + + // Rather than walking the dominator tree looking for conditions which might + // apply, we restrict out search to those conditions which are uses of the + // value we're interested in. It's debatable which approach is + // better/faster, but this avoids work in cases where there are few uses of + // the value in question and a complex CFG. + + // Avoid useless work + if (auto VI = dyn_cast(V)) + if (VI->getParent() == Cxt->getParent()) + return; + + const unsigned BitWidth = KnownZero.getBitWidth(); + unsigned NumExplored = 0; + for (auto U : V->users()) { + // Avoid massive lists + if (NumExplored >= DomConditionsMaxUses) + break; + NumExplored++; + // Consider only compare instructions uniquely controlling a branch + ICmpInst *Cmp = dyn_cast(U); + if (!Cmp) + continue; + + if (DomConditionsSingleCmpUse && + !Cmp->hasOneUse()) + continue; + + for (auto *CmpU : Cmp->users()) { + BranchInst *BI = dyn_cast(CmpU); + if (!BI || + BI->isUnconditional()) + continue; + // We're looking for conditions that are guaranteed to hold at the context + // instruction. Finding a condition where one path dominates the context + // isn't enough because both the true and false cases could merge before + // the context instruction we're actually interested in. Instead, we need + // to ensure that the taken *edge* dominates the context instruction. + BasicBlock *BB0 = BI->getSuccessor(0); + if (!Q.DT->dominates(BasicBlockEdge(BI->getParent(), BB0), + Q.CxtI->getParent())) + return; + + Value *LHS = Cmp->getOperand(0); + Value *RHS = Cmp->getOperand(1); + switch (Cmp->getPredicate()) { + default: + // We know nothing from this condition + break; + // TODO: implement unsigned bound from below (known one bits) + // TODO: common condition check implementations with assumes + // TODO: implement other patterns from assume (e.g. V & B == A) + case ICmpInst::ICMP_SGT: + if (LHS == V) { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, TD, Depth+1, Q); + if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) { + // We know that the sign bit is zero. + KnownZero |= APInt::getSignBit(BitWidth); + } + } + break; + case ICmpInst::ICMP_EQ: + if (LHS == V) + computeKnownBits(RHS, KnownZero, KnownOne, TD, Depth+1, Q); + else if (RHS == V) + computeKnownBits(LHS, KnownZero, KnownOne, TD, Depth+1, Q); + else + llvm_unreachable("missing use?"); + break; + case ICmpInst::ICMP_ULE: + if (LHS == V) { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, TD, Depth+1, Q); + // The known zero bits carry over + unsigned SignBits = KnownZeroTemp.countLeadingOnes(); + KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits); + } + break; + case ICmpInst::ICMP_ULT: + if (LHS == V) { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, TD, Depth+1, Q); + // Whatever high bits in rhs are zero are known to be zero (if rhs is a + // power of 2, then one more). + unsigned SignBits = KnownZeroTemp.countLeadingOnes(); + if (isKnownToBeAPowerOfTwo(RHS, false, Depth+1, Query(Q, Cmp))) + SignBits++; + KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits); + } + break; + }; + } + } +} + static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout *DL, @@ -839,6 +963,12 @@ // Don't give up yet... there might be an assumption that provides more // information... computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q); + + // Or a dominating condition for that matter + if(EnableDomConditions && Depth <= DomConditionsMaxDepth) { + computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, + TD, Depth, Q); + } return; } @@ -860,6 +990,13 @@ // Check whether a nearby assume intrinsic can determine some known bits. computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q); + // Check whether there's a dominating condition which implies something about + // this value at the given context. + if(EnableDomConditions && Depth <= DomConditionsMaxDepth) { + computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, + TD, Depth, Q); + } + Operator *I = dyn_cast(V); if (!I) return; Index: test/Transforms/InstCombine/dom-conditions.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/dom-conditions.ll @@ -0,0 +1,105 @@ +; RUN: opt -instcombine -value-tracking-dom-conditions=1 -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +define i1 @test_cmp_ult(i64 %A) { +; CHECK-LABEL: @test_cmp_ult +entry: + %cmp = icmp ult i64 %A, 64 + br i1 %cmp, label %taken, label %untaken + +taken: +; CHECK-LABEL: taken: +; CHECK-NEXT: ret i1 false + %cmp2 = icmp ugt i64 %A, 64 + ret i1 %cmp2 +untaken: + ret i1 true +} + +define i1 @test_cmp_ule(i64 %A) { +; CHECK-LABEL: @test_cmp_ule +entry: + %cmp = icmp ule i64 %A, 64 + br i1 %cmp, label %taken, label %untaken + +taken: +; CHECK-LABEL: taken: +; CHECK-NEXT: ret i1 false + %cmp2 = icmp ugt i64 %A, 128 + ret i1 %cmp2 +untaken: + ret i1 true +} + +define i1 @test_cmp_sgt(i32 %A) { +; CHECK-LABEL: @test_cmp_sgt +entry: + %cmp = icmp sgt i32 %A, 10 + br i1 %cmp, label %taken, label %untaken + +taken: +; CHECK-LABEL: taken: +; CHECK-NEXT: ret i1 true + %cmp2 = icmp sgt i32 %A, -1 + ret i1 %cmp2 +untaken: + ret i1 true +} + + + +define i64 @test_add_zero_bits(i64 %A) { +; CHECK-LABEL: @test_add_zero_bits +entry: + %cmp = icmp eq i64 %A, 2 + br i1 %cmp, label %taken, label %untaken + +taken: +; CHECK-LABEL: taken: +; CHECK-NEXT: %add = or i64 %A, 1 +; CHECK-NEXT: ret i64 %add + %add = add i64 %A, 1 + ret i64 %add +untaken: + ret i64 %A +} + +define i64 @test_add_nsw(i64 %A) { +; CHECK-LABEL: @test_add_nsw +entry: + %cmp = icmp ult i64 %A, 20 + br i1 %cmp, label %taken, label %untaken + +taken: +; CHECK-LABEL: taken: +; CHECK-NEXT: %add = add nuw nsw i64 %A, 1 +; CHECK-NEXT: ret i64 %add + %add = add i64 %A, 1 + ret i64 %add +untaken: + ret i64 %A +} + +; After sinking the instructions into the if block, check that we +; can simplify some of them using dominating conditions. +define i32 @test_add_zero_bits_sink(i32 %x) nounwind ssp { +; CHECK-LABEL: @test_add_zero_bits_sink( +; CHECK-NOT: sdiv i32 +entry: + %a = add nsw i32 %x, 16 ; [#uses=1] + %b = sdiv i32 %a, %x ; [#uses=1] + %cmp = icmp ult i32 %x, 7 ; [#uses=1] + br i1 %cmp, label %bb1, label %bb2 + +bb1: ; preds = %bb +; CHECK-LABEL: bb1: +; CHECK-NEXT: or i32 %x, 16 +; CHECK-NEXT: sdiv i32 + ret i32 %b + +bb2: ; preds = %bb, %bb1 + ret i32 %x +} + +declare i32 @bar()