Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1626,20 +1626,45 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, BinaryOperator *And, const APInt &C1) { + bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE; + // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1 // TODO: We canonicalize to the longer form for scalars because we have // better analysis/folds for icmp, and codegen may be better with icmp. - if (Cmp.getPredicate() == CmpInst::ICMP_NE && Cmp.getType()->isVectorTy() && - C1.isNullValue() && match(And->getOperand(1), m_One())) + if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isNullValue() && + match(And->getOperand(1), m_One())) return new TruncInst(And->getOperand(0), Cmp.getType()); const APInt *C2; - if (!match(And->getOperand(1), m_APInt(C2))) + Value *X; + if (!match(And, m_And(m_Value(X), m_APInt(C2)))) return nullptr; if (!And->hasOneUse()) return nullptr; + // If X is shift instruction, codegen is better when fold + // (V0 << V1) & signbit != 0 into (V0 << V1) s< 0 + // rather than (V0 & (signbit >> V1)) != 0; + // and fold (V0 << V1) & ~C == 0, C+1 is power of 2 into + // (V0 << V1) < C+1, rather than (V0 & (~C >> V1)) == 0. + if (Cmp.isEquality() && C1.isNullValue()) { + // Replace (and X, (1 << size(X)-1) != 0) with X s< 0 + if (C2->isSignMask()) { + Constant *Zero = Constant::getNullValue(X->getType()); + auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; + return new ICmpInst(NewPred, X, Zero); + } + + // ((X & ~7) == 0) --> X < 8 + if ((~(*C2) + 1).isPowerOf2()) { + Constant *NegBOC = + ConstantExpr::getNeg(cast(And->getOperand(1))); + auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; + return new ICmpInst(NewPred, X, NegBOC); + } + } + // If the LHS is an 'and' of a truncate and we can widen the and/compare to // the input width without changing the value produced, eliminate the cast: // @@ -1759,6 +1784,29 @@ } } + // (V0 & (signbit l>> V1)) ==/!= 0 -> (V0 << V1) >=/< 0 + // (V0 & (signbit << V1)) ==/!= 0 -> (V0 l>> V1) >=/< 0 + Value *V0, *V1, *Shift, *Zero; + ICmpInst::Predicate Pred; + if (match(&Cmp, + m_ICmp(Pred, + m_OneUse(m_c_And( + m_CombineAnd( + m_CombineAnd(m_Shift(m_SignMask(), m_Value(V1)), + m_Value(Shift)), + m_CombineOr(m_Shl(m_Value(), m_Value()), + m_LShr(m_Value(), m_Value()))), + m_OneUse(m_Value(V0)))), + m_CombineAnd(m_Zero(), m_Value(Zero)))) && + Cmp.isEquality(Pred)) { + Value *NewShift = cast(Shift)->getOpcode() == Instruction::LShr + ? Builder.CreateShl(V0, V1) + : Builder.CreateLShr(V0, V1); + ICmpInst::Predicate NewPred = + Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT; + return new ICmpInst(NewPred, NewShift, Zero); + } + return nullptr; } @@ -2783,24 +2831,6 @@ if (C == *BOC && C.isPowerOf2()) return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, BO, Constant::getNullValue(RHS->getType())); - - // Don't perform the following transforms if the AND has multiple uses - if (!BO->hasOneUse()) - break; - - // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 - if (BOC->isSignMask()) { - Constant *Zero = Constant::getNullValue(BOp0->getType()); - auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; - return new ICmpInst(NewPred, BOp0, Zero); - } - - // ((X & ~7) == 0) --> X < 8 - if (C.isNullValue() && (~(*BOC) + 1).isPowerOf2()) { - Constant *NegBOC = ConstantExpr::getNeg(cast(BOp1)); - auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; - return new ICmpInst(NewPred, BOp0, NegBOC); - } } break; } Index: test/Transforms/InstCombine/icmp-shift-and-signbit.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/icmp-shift-and-signbit.ll @@ -0,0 +1,69 @@ +; RUN: opt %s -instcombine -S | FileCheck %s + + +; ((X << Y) & signbit) ==/!= 0 -> (X << Y) >=/< 0 +; CHECK-LABEL: shl-and-signbit +; CHECK: [[SHL:%.*]] = shl i32 %x, %y +; CHECK-NEXT icmp slt i32 [[SHL]], 0 +define dso_local i32 @shl-and-signbit(i32 %x, i32 %y, i32 %a, i32 %b) { +entry: + %shl = shl i32 %x, %y + %and = and i32 %shl, -2147483648 + %tobool = icmp ne i32 %and, 0 + %selv = select i1 %tobool, i32 %a, i32 %b + ret i32 %selv +} + +; ((X << Y) & ~C) ==/!= 0 -> (X << Y) = C+1; C+1 is power of 2 +; CHECK-LABEL: shl-and-negC +; CHECK: [[SHL:%.*]] = shl i32 %x, %y +; CHECK-NEXT icmp ugt i32 [[SHL]], 7 +define dso_local i32 @shl-and-negC(i32 %x, i32 %y, i32 %a, i32 %b) { +entry: + %shl = shl i32 %x, %y + %and = and i32 %shl, 4294967288 ; ~7 + %tobool = icmp ne i32 %and, 0 + %selv = select i1 %tobool, i32 %a, i32 %b + ret i32 %selv +} + +; ((X l>> Y) & ~C) ==/!= 0 -> (X l>> Y) = C+1; C+1 is power of 2 +; CHECK-LABEL: lshr-and-negC +; CHECK: [[LSHR:%.*]] = lshr i32 %x, %y +; CHECK-NEXT icmp ult i32 [[LSHR]], 8 +define dso_local i32 @lshr-and-negC(i32 %x, i32 %y, i32 %a, i32 %b) { +entry: + %shl = lshr i32 %x, %y + %and = and i32 %shl, 4294967288 ; ~7 + %tobool = icmp eq i32 %and, 0 + %selv = select i1 %tobool, i32 %a, i32 %b + ret i32 %selv +} + +; (X & (signbit l>> Y)) ==/!= 0 -> (X << Y) >=/< 0 +; CHECK-LABEL: signbit-lshr-and +; CHECK: [[SHL:%.*]] = shl i32 %x, %y +; CHECK-NEXT icmp slt i32 [[SHL]], 0 +define dso_local i32 @signbit-lshr-and(i32 %x, i32 %y, i32 %a, i32 %b) { +entry: + %shr = lshr i32 -2147483648, %y + %and = and i32 %shr, %x + %tobool = icmp ne i32 %and, 0 + %selv = select i1 %tobool, i32 %a, i32 %b + ret i32 %selv +} + +; (X & (signbit << Y)) ==/!= 0 -> (X l>> Y) >=/< 0 +; CHECK-LABEL: signbit-shl-and +; CHECK: [[LSHR:%.*]] = lshr i32 %x, %y +; CHECK-NEXT icmp sgt i32 [[LSHR]], -1 +define dso_local i32 @signbit-shl-and(i32 %x, i32 %y, i32 %a, i32 %b) { +entry: + %shr = shl i32 -2147483648, %y + %and = and i32 %shr, %x + %tobool = icmp eq i32 %and, 0 + %selv = select i1 %tobool, i32 %a, i32 %b + ret i32 %selv +} + + Index: test/Transforms/InstCombine/pr17827.ll =================================================================== --- test/Transforms/InstCombine/pr17827.ll +++ test/Transforms/InstCombine/pr17827.ll @@ -66,8 +66,8 @@ ; Unsigned compare allows a transformation to compare against 0. define i1 @test_shift_and_cmp_changed2(i8 %p) { ; CHECK-LABEL: @test_shift_and_cmp_changed2( -; CHECK-NEXT: [[ANDP:%.*]] = and i8 %p, 6 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[ANDP]], 0 +; CHECK-NEXT: [[SHLP:%.*]] = shl i8 %p, 5 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[SHLP]], 64 ; CHECK-NEXT: ret i1 [[CMP]] ; %shlp = shl i8 %p, 5 @@ -78,8 +78,8 @@ define <2 x i1> @test_shift_and_cmp_changed2_vec(<2 x i8> %p) { ; CHECK-LABEL: @test_shift_and_cmp_changed2_vec( -; CHECK-NEXT: [[ANDP:%.*]] = and <2 x i8> %p, -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ANDP]], zeroinitializer +; CHECK-NEXT: [[SHLP:%.*]] = shl <2 x i8> %p, +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[SHLP]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shlp = shl <2 x i8> %p,