Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1626,20 +1626,42 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, BinaryOperator *And, const APInt &C1) { + bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE; + // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1 // TODO: We canonicalize to the longer form for scalars because we have // better analysis/folds for icmp, and codegen may be better with icmp. - if (Cmp.getPredicate() == CmpInst::ICMP_NE && Cmp.getType()->isVectorTy() && - C1.isNullValue() && match(And->getOperand(1), m_One())) + if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isNullValue() && + match(And->getOperand(1), m_One())) return new TruncInst(And->getOperand(0), Cmp.getType()); const APInt *C2; - if (!match(And->getOperand(1), m_APInt(C2))) + Value *X; + if (!match(And, m_And(m_Value(X), m_APInt(C2)))) return nullptr; if (!And->hasOneUse()) return nullptr; + if (Cmp.isEquality() && C1.isNullValue()) { + // Replace (and X, (1 << size(X)-1) != 0) with X s< 0 + if (C2->isSignMask()) { + Constant *Zero = Constant::getNullValue(X->getType()); + auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; + return new ICmpInst(NewPred, X, Zero); + } + + // ((X & ~7) == 0) --> X < 8 + // If X is (BinOp Y, C3), allow other rules to fold C3 with C2. + if (!match(X, m_c_BinOp(m_Value(), m_Constant())) && + (~(*C2) + 1).isPowerOf2()) { + Constant *NegBOC = + ConstantExpr::getNeg(cast(And->getOperand(1))); + auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; + return new ICmpInst(NewPred, X, NegBOC); + } + } + // If the LHS is an 'and' of a truncate and we can widen the and/compare to // the input width without changing the value produced, eliminate the cast: // @@ -2783,24 +2805,6 @@ if (C == *BOC && C.isPowerOf2()) return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, BO, Constant::getNullValue(RHS->getType())); - - // Don't perform the following transforms if the AND has multiple uses - if (!BO->hasOneUse()) - break; - - // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 - if (BOC->isSignMask()) { - Constant *Zero = Constant::getNullValue(BOp0->getType()); - auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; - return new ICmpInst(NewPred, BOp0, Zero); - } - - // ((X & ~7) == 0) --> X < 8 - if (C.isNullValue() && (~(*BOC) + 1).isPowerOf2()) { - Constant *NegBOC = ConstantExpr::getNeg(cast(BOp1)); - auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; - return new ICmpInst(NewPred, BOp0, NegBOC); - } } break; } Index: test/Transforms/InstCombine/icmp-shift-and-negC.ll =================================================================== --- test/Transforms/InstCombine/icmp-shift-and-negC.ll +++ test/Transforms/InstCombine/icmp-shift-and-negC.ll @@ -2,12 +2,14 @@ ; RUN: opt %s -instcombine -S | FileCheck %s -; FIXME: expect ((X << Y) & ~C) ==/!= 0 -> (X << Y) = C+1; C+1 is power of 2 +; Checking: +; ((X << Y) & ~C) ==/!= 0 -> (X << Y) = C+1; C+1 is power of 2 +; ((X l>> Y) & ~C) ==/!= 0 -> (X l>> Y) = C+1; C+1 is power of 2 + define i1 @shl-and-negC(i32 %x, i32 %y) { ; CHECK-LABEL: @shl-and-negC( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 -8, [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ugt i32 [[SHL]], 7 ; CHECK-NEXT: ret i1 [[R]] ; %shl = shl i32 %x, %y @@ -16,12 +18,10 @@ ret i1 %r } -; FIXME: expect ((X l>> Y) & ~C) ==/!= 0 -> (X l>> Y) = C+1; C+1 is power of 2 define i1 @lshr-and-negC(i32 %x, i32 %y) { ; CHECK-LABEL: @lshr-and-negC( -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 -8, [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ult i32 [[LSHR]], 8 ; CHECK-NEXT: ret i1 [[R]] ; %lshr = lshr i32 %x, %y @@ -29,3 +29,106 @@ %r = icmp eq i32 %and, 0 ret i1 %r } + +define <2 x i1> @lshr-and-negC-vec(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @lshr-and-negC-vec( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i32> [[LSHR]], +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %lshr = lshr <2 x i32> %x, %y + %and = and <2 x i32> %lshr, ; ~7 + %r = icmp eq <2 x i32> %and, + ret <2 x i1> %r +} + +define <3 x i1> @lshr-and-negC-vec-undef1(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @lshr-and-negC-vec-undef1( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i32> [[AND]], zeroinitializer +; CHECK-NEXT: ret <3 x i1> [[R]] +; + %lshr = lshr <3 x i32> %x, %y + %and = and <3 x i32> %lshr, ; ~7 + %r = icmp eq <3 x i32> %and, + ret <3 x i1> %r +} + +define <3 x i1> @lshr-and-negC-vec-undef2(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @lshr-and-negC-vec-undef2( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i32> [[AND]], +; CHECK-NEXT: ret <3 x i1> [[R]] +; + %lshr = lshr <3 x i32> %x, %y + %and = and <3 x i32> %lshr, ; ~7 + %r = icmp eq <3 x i32> %and, + ret <3 x i1> %r +} + +define <3 x i1> @lshr-and-negC-vec-undef3(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @lshr-and-negC-vec-undef3( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i32> [[AND]], +; CHECK-NEXT: ret <3 x i1> [[R]] +; + %lshr = lshr <3 x i32> %x, %y + %and = and <3 x i32> %lshr, ; ~7 + %r = icmp eq <3 x i32> %and, + ret <3 x i1> %r +} + +define i1 @shl-and-negC-extra-use-shl(i32 %x, i32 %y, i32 %z, i32* %p) { +; CHECK-LABEL: @shl-and-negC-extra-use-shl( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], [[Z:%.*]] +; CHECK-NEXT: store i32 [[XOR]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i32 [[SHL]], 7 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i32 %x, %y + %xor = xor i32 %shl, %z ; extra use of shl + store i32 %xor, i32* %p + %and = and i32 %shl, 4294967288 ; ~7 + %r = icmp ne i32 %and, 0 + ret i1 %r +} + +define i1 @shl-and-negC-extra-use-and(i32 %x, i32 %y, i32 %z, i32* %p) { +; CHECK-LABEL: @shl-and-negC-extra-use-and( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], -8 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[AND]], [[Z:%.*]] +; CHECK-NEXT: store i32 [[MUL]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i32 %x, %y + %and = and i32 %shl, 4294967288 ; ~7 + %mul = mul i32 %and, %z ; extra use of and + store i32 %mul, i32* %p + %r = icmp ne i32 %and, 0 + ret i1 %r +} + +define i1 @shl-and-negC-extra-use-shl-and(i32 %x, i32 %y, i32 %z, i32* %p) { +; CHECK-LABEL: @shl-and-negC-extra-use-shl-and( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], -8 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[AND]], [[Z:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SHL]], [[XOR]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i32 %x, %y + %and = and i32 %shl, 4294967288 ; ~7 + %xor = xor i32 %and, %z ; extra use of and + %add = add i32 %shl, %xor ; extra use of shl + store i32 %add, i32* %p + %r = icmp ne i32 %and, 0 + ret i1 %r +} Index: test/Transforms/InstCombine/icmp-shift-and-signbit.ll =================================================================== --- test/Transforms/InstCombine/icmp-shift-and-signbit.ll +++ test/Transforms/InstCombine/icmp-shift-and-signbit.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt %s -instcombine -S | FileCheck %s -; FIXME: expect ((X << Y) & signbit) ==/!= 0 -> (X << Y) >=/< 0 +; Checking: +; ((X << Y) & signbit) ==/!= 0 -> (X << Y) >=/< 0 +; ((X l>> Y) & signbit) ==/!= 0 -> (X l>> Y) >=/< 0 + define i1 @shl-and-signbit(i32 %x, i32 %y) { ; CHECK-LABEL: @shl-and-signbit( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 -2147483648, [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp slt i32 [[SHL]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %shl = shl i32 %x, %y @@ -15,12 +17,10 @@ ret i1 %r } -; FIXME: expect ((X l>> Y) & signbit) ==/!= 0 -> (X l>> Y) >=/< 0 define i1 @lshr-and-signbit(i32 %x, i32 %y) { ; CHECK-LABEL: @lshr-and-signbit( -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 -2147483648, [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp sgt i32 [[LSHR]], -1 ; CHECK-NEXT: ret i1 [[R]] ; %lshr = lshr i32 %x, %y @@ -28,3 +28,106 @@ %r = icmp eq i32 %and, 0 ret i1 %r } + +define <2 x i1> @lshr-and-signbit-vec(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @lshr-and-signbit-vec( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i32> [[LSHR]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %lshr = lshr <2 x i32> %x, %y + %and = and <2 x i32> %lshr, + %r = icmp ne <2 x i32> %and, + ret <2 x i1> %r +} + +define <3 x i1> @lshr-and-signbit-vec-undef1(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @lshr-and-signbit-vec-undef1( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <3 x i32> [[AND]], zeroinitializer +; CHECK-NEXT: ret <3 x i1> [[R]] +; + %lshr = lshr <3 x i32> %x, %y + %and = and <3 x i32> %lshr, + %r = icmp ne <3 x i32> %and, + ret <3 x i1> %r +} + +define <3 x i1> @lshr-and-signbit-vec-undef2(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @lshr-and-signbit-vec-undef2( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i32> [[AND]], +; CHECK-NEXT: ret <3 x i1> [[R]] +; + %lshr = lshr <3 x i32> %x, %y + %and = and <3 x i32> %lshr, + %r = icmp eq <3 x i32> %and, + ret <3 x i1> %r +} + +define <3 x i1> @lshr-and-signbit-vec-undef3(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @lshr-and-signbit-vec-undef3( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <3 x i32> [[AND]], +; CHECK-NEXT: ret <3 x i1> [[R]] +; + %lshr = lshr <3 x i32> %x, %y + %and = and <3 x i32> %lshr, + %r = icmp ne <3 x i32> %and, + ret <3 x i1> %r +} + +define i1 @shl-and-signbit-extra-use-shl(i32 %x, i32 %y, i32 %z, i32* %p) { +; CHECK-LABEL: @shl-and-signbit-extra-use-shl( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], [[Z:%.*]] +; CHECK-NEXT: store i32 [[XOR]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = icmp slt i32 [[SHL]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i32 %x, %y + %xor = xor i32 %shl, %z ; extra use of shl + store i32 %xor, i32* %p + %and = and i32 %shl, -2147483648 + %r = icmp ne i32 %and, 0 + ret i1 %r +} + +define i1 @shl-and-signbit-extra-use-and(i32 %x, i32 %y, i32 %z, i32* %p) { +; CHECK-LABEL: @shl-and-signbit-extra-use-and( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], -2147483648 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[AND]], [[Z:%.*]] +; CHECK-NEXT: store i32 [[MUL]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i32 %x, %y + %and = and i32 %shl, -2147483648 + %mul = mul i32 %and, %z ; extra use of and + store i32 %mul, i32* %p + %r = icmp ne i32 %and, 0 + ret i1 %r +} + +define i1 @shl-and-signbit-extra-use-shl-and(i32 %x, i32 %y, i32 %z, i32* %p) { +; CHECK-LABEL: @shl-and-signbit-extra-use-shl-and( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], -2147483648 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[AND]], [[Z:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SHL]], [[XOR]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %shl = shl i32 %x, %y + %and = and i32 %shl, -2147483648 + %xor = xor i32 %and, %z ; extra use of and + %add = add i32 %shl, %xor ; extra use of shl + store i32 %add, i32* %p + %r = icmp ne i32 %and, 0 + ret i1 %r +}