Index: llvm/trunk/include/llvm/IR/ConstantRange.h =================================================================== --- llvm/trunk/include/llvm/IR/ConstantRange.h +++ llvm/trunk/include/llvm/IR/ConstantRange.h @@ -82,6 +82,16 @@ static ConstantRange makeSatisfyingICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other); + /// Produce the exact range such that all values in the returned range satisfy + /// the given predicate with any value contained within Other. Formally, this + /// returns the exact answer when the superset of 'union over all y in Other + /// is exactly same as the subset of intersection over all y in Other. + /// { x : icmp op x y is true}'. + /// + /// Example: Pred = ult and Other = i8 3 returns [0, 3) + static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, + const APInt &Other); + /// Return the largest range containing all X such that "X BinOpC Y" is /// guaranteed not to wrap (overflow) for all Y in Other. /// Index: llvm/trunk/lib/IR/ConstantRange.cpp =================================================================== --- llvm/trunk/lib/IR/ConstantRange.cpp +++ llvm/trunk/lib/IR/ConstantRange.cpp @@ -127,6 +127,18 @@ .inverse(); } +ConstantRange ConstantRange::makeExactICmpRegion(CmpInst::Predicate Pred, + const APInt &C) { + // Computes the exact range that is equal to both the constant ranges returned + // by makeAllowedICmpRegion and makeSatisfyingICmpRegion. This is always true + // when RHS is a singleton such as an APInt and so the assert is valid. + // However for non-singleton RHS, for example ult [2,5) makeAllowedICmpRegion + // returns [0,4) but makeSatisfyICmpRegion returns [0,2). + // + assert(makeAllowedICmpRegion(Pred, C) == makeSatisfyingICmpRegion(Pred, C)); + return makeAllowedICmpRegion(Pred, C); +} + ConstantRange ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, const ConstantRange &Other, Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -114,6 +114,15 @@ IsSigned); } +/// Given an icmp instruction, return true if any use of this comparison is a +/// branch on sign bit comparison. +static bool isBranchOnSignBitCheck(ICmpInst &I, bool isSignBit) { + for (auto *U : I.users()) + if (isa(U)) + return isSignBit; + return false; +} + /// isSignBitCheck - Given an exploded icmp instruction, return true if the /// comparison only checks the sign bit. If it only checks the sign bit, set /// TrueIfSigned if the result of the comparison is true when the input value is @@ -3284,6 +3293,42 @@ // bits, if it is a sign bit comparison, it only demands the sign bit. bool UnusedBit; isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); + + // Canonicalize icmp instructions based on dominating conditions. + BasicBlock *Parent = I.getParent(); + BasicBlock *Dom = Parent->getSinglePredecessor(); + auto *BI = Dom ? dyn_cast(Dom->getTerminator()) : nullptr; + ICmpInst::Predicate Pred; + BasicBlock *TrueBB, *FalseBB; + ConstantInt *CI2; + if (BI && match(BI, m_Br(m_ICmp(Pred, m_Specific(Op0), m_ConstantInt(CI2)), + TrueBB, FalseBB)) && + TrueBB != FalseBB) { + ConstantRange CR = ConstantRange::makeAllowedICmpRegion(I.getPredicate(), + CI->getValue()); + ConstantRange DominatingCR = + (Parent == TrueBB) + ? ConstantRange::makeExactICmpRegion(Pred, CI2->getValue()) + : ConstantRange::makeExactICmpRegion( + CmpInst::getInversePredicate(Pred), CI2->getValue()); + ConstantRange Intersection = DominatingCR.intersectWith(CR); + ConstantRange Difference = DominatingCR.difference(CR); + if (Intersection.isEmptySet()) + return replaceInstUsesWith(I, Builder->getFalse()); + if (Difference.isEmptySet()) + return replaceInstUsesWith(I, Builder->getTrue()); + // Canonicalizing a sign bit comparison that gets used in a branch, + // pessimizes codegen by generating branch on zero instruction instead + // of a test and branch. So we avoid canonicalizing in such situations + // because test and branch instruction has better branch displacement + // than compare and branch instruction. + if (!isBranchOnSignBitCheck(I, isSignBit) && !I.isEquality()) { + if (auto *AI = Intersection.getSingleElement()) + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Builder->getInt(*AI)); + if (auto *AD = Difference.getSingleElement()) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Builder->getInt(*AD)); + } + } } // See if we can fold the comparison based on range information we can get Index: llvm/trunk/test/Transforms/InstCombine/icmp.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/icmp.ll +++ llvm/trunk/test/Transforms/InstCombine/icmp.ll @@ -1979,3 +1979,121 @@ ret i1 %cmp } +; CHECK-LABEL: @idom_sign_bit_check_edge_dominates +define void @idom_sign_bit_check_edge_dominates(i64 %a) { +entry: + %cmp = icmp slt i64 %a, 0 + br i1 %cmp, label %land.lhs.true, label %lor.rhs + +land.lhs.true: ; preds = %entry + br label %lor.end + +; CHECK-LABEL: lor.rhs: +; CHECK-NOT: icmp sgt i64 %a, 0 +; CHECK: icmp eq i64 %a, 0 +lor.rhs: ; preds = %entry + %cmp2 = icmp sgt i64 %a, 0 + br i1 %cmp2, label %land.rhs, label %lor.end + +land.rhs: ; preds = %lor.rhs + br label %lor.end + +lor.end: ; preds = %land.rhs, %lor.rhs, %land.lhs.true + ret void +} + +; CHECK-LABEL: @idom_sign_bit_check_edge_not_dominates +define void @idom_sign_bit_check_edge_not_dominates(i64 %a) { +entry: + %cmp = icmp slt i64 %a, 0 + br i1 %cmp, label %land.lhs.true, label %lor.rhs + +land.lhs.true: ; preds = %entry + br i1 undef, label %lor.end, label %lor.rhs + +; CHECK-LABEL: lor.rhs: +; CHECK: icmp sgt i64 %a, 0 +; CHECK-NOT: icmp eq i64 %a, 0 +lor.rhs: ; preds = %land.lhs.true, %entry + %cmp2 = icmp sgt i64 %a, 0 + br i1 %cmp2, label %land.rhs, label %lor.end + +land.rhs: ; preds = %lor.rhs + br label %lor.end + +lor.end: ; preds = %land.rhs, %lor.rhs, %land.lhs.true + ret void +} + +; CHECK-LABEL: @idom_sign_bit_check_edge_dominates_select +define void @idom_sign_bit_check_edge_dominates_select(i64 %a, i64 %b) { +entry: + %cmp = icmp slt i64 %a, 5 + br i1 %cmp, label %land.lhs.true, label %lor.rhs + +land.lhs.true: ; preds = %entry + br label %lor.end + +; CHECK-LABEL: lor.rhs: +; CHECK-NOT: [[B:%.*]] = icmp sgt i64 %a, 5 +; CHECK: [[C:%.*]] = icmp eq i64 %a, %b +; CHECK-NOT: [[D:%.*]] = select i1 [[B]], i64 %a, i64 5 +; CHECK-NOT: icmp ne i64 [[D]], %b +; CHECK-NEXT: br i1 [[C]], label %lor.end, label %land.rhs +lor.rhs: ; preds = %entry + %cmp2 = icmp sgt i64 %a, 5 + %select = select i1 %cmp2, i64 %a, i64 5 + %cmp3 = icmp ne i64 %select, %b + br i1 %cmp3, label %land.rhs, label %lor.end + +land.rhs: ; preds = %lor.rhs + br label %lor.end + +lor.end: ; preds = %land.rhs, %lor.rhs, %land.lhs.true + ret void +} + +; CHECK-LABEL: @idom_zbranch +define void @idom_zbranch(i64 %a) { +entry: + %cmp = icmp sgt i64 %a, 0 + br i1 %cmp, label %lor.end, label %lor.rhs + +; CHECK-LABEL: lor.rhs: +; CHECK: icmp slt i64 %a, 0 +; CHECK-NOT: icmp eq i64 %a, 0 +lor.rhs: ; preds = %entry + %cmp2 = icmp slt i64 %a, 0 + br i1 %cmp2, label %land.rhs, label %lor.end + +land.rhs: ; preds = %lor.rhs + br label %lor.end + +lor.end: ; preds = %land.rhs, %lor.rhs + ret void +} + +; CHECK-LABEL: @idom_not_zbranch +define void @idom_not_zbranch(i32 %a, i32 %b) { +entry: + %cmp = icmp sgt i32 %a, 0 + br i1 %cmp, label %return, label %if.end + +; CHECK-LABEL: if.end: +; CHECK-NOT: [[B:%.*]] = icmp slt i32 %a, 0 +; CHECK: [[C:%.*]] = icmp eq i32 %a, %b +; CHECK-NOT: [[D:%.*]] = select i1 [[B]], i32 %a, i32 0 +; CHECK-NOT: icmp ne i32 [[D]], %b +; CHECK-NEXT: br i1 [[C]], label %return, label %if.then3 +if.end: ; preds = %entry + %cmp1 = icmp slt i32 %a, 0 + %a. = select i1 %cmp1, i32 %a, i32 0 + %cmp2 = icmp ne i32 %a., %b + br i1 %cmp2, label %if.then3, label %return + +if.then3: ; preds = %if.end + br label %return + +return: ; preds = %if.end, %entry, %if.then3 + ret void +}