Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -772,13 +772,13 @@ // If we are just checking for a icmp eq of a single bit and zext'ing it // to an integer, then shift the bit to the appropriate place and then // cast to integer to avoid the comparison. - if (ConstantInt *Op1C = dyn_cast(ICI->getOperand(1))) { - const APInt &Op1CV = Op1C->getValue(); + const APInt *Op1CV; + if (match(ICI->getOperand(1), m_APInt(Op1CV))) { // zext (x x>>u31 true if signbit set. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. - if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV.isNullValue()) || - (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { + if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) || + (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) { if (!DoTransform) return ICI; Value *In = ICI->getOperand(0); @@ -804,7 +804,7 @@ // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - if ((Op1CV.isNullValue() || Op1CV.isPowerOf2()) && + if ((Op1CV->isNullValue() || Op1CV->isPowerOf2()) && // This only works for EQ and NE ICI->isEquality()) { // If Op1C some other power of two, convert: @@ -815,7 +815,7 @@ if (!DoTransform) return ICI; bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; - if (!Op1CV.isNullValue() && (Op1CV != KnownZeroMask)) { + if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) { // (X&4) == 2 --> false // (X&4) != 2 --> true Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()), @@ -833,7 +833,7 @@ In->getName() + ".lobit"); } - if (!Op1CV.isNullValue() == isNE) { // Toggle the low bit. + if (!Op1CV->isNullValue() == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); In = Builder.CreateXor(In, One); } Index: llvm/trunk/test/Transforms/InstCombine/compare-signs.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/compare-signs.ll +++ llvm/trunk/test/Transforms/InstCombine/compare-signs.ll @@ -48,6 +48,22 @@ ret i32 %t3 } +; TODO this should optimize but doesn't due to missing vector support in InstCombiner::foldICmpEquality. +define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone { +; CHECK-LABEL: @test3vec( +; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[T3]] +; + %t0 = lshr <2 x i32> %a, + %t1 = lshr <2 x i32> %b, + %t2 = icmp eq <2 x i32> %t0, %t1 + %t3 = zext <2 x i1> %t2 to <2 x i32> + ret <2 x i32> %t3 +} + ; Variation on @test3: checking the 2nd bit in a situation where the 5th bit ; is one, not zero. define i32 @test3i(i32 %a, i32 %b) nounwind readnone { Index: llvm/trunk/test/Transforms/InstCombine/icmp.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/icmp.ll +++ llvm/trunk/test/Transforms/InstCombine/icmp.ll @@ -13,6 +13,16 @@ ret i32 %b } +define <2 x i32> @test1vec(<2 x i32> %X) { +; CHECK-LABEL: @test1vec( +; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: ret <2 x i32> [[X_LOBIT]] +; + %a = icmp slt <2 x i32> %X, zeroinitializer + %b = zext <2 x i1> %a to <2 x i32> + ret <2 x i32> %b +} + define i32 @test2(i32 %X) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr i32 %X, 31 @@ -24,6 +34,17 @@ ret i32 %b } +define <2 x i32> @test2vec(<2 x i32> %X) { +; CHECK-LABEL: @test2vec( +; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_LOBIT_NOT:%.*]] = xor <2 x i32> [[X_LOBIT]], +; CHECK-NEXT: ret <2 x i32> [[X_LOBIT_NOT]] +; + %a = icmp ult <2 x i32> %X, + %b = zext <2 x i1> %a to <2 x i32> + ret <2 x i32> %b +} + define i32 @test3(i32 %X) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr i32 %X, 31 Index: llvm/trunk/test/Transforms/InstCombine/set.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/set.ll +++ llvm/trunk/test/Transforms/InstCombine/set.ll @@ -282,6 +282,17 @@ ret i32 %D } +define <2 x i32> @test20vec(<2 x i32> %A) { +; CHECK-LABEL: @test20vec( +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i32> [[B]] +; + %B = and <2 x i32> %A, + %C = icmp ne <2 x i32> %B, zeroinitializer + %D = zext <2 x i1> %C to <2 x i32> + ret <2 x i32> %D +} + define i32 @test21(i32 %a) { ; CHECK-LABEL: @test21( ; CHECK-NEXT: [[TMP_6:%.*]] = lshr i32 %a, 2 @@ -294,6 +305,18 @@ ret i32 %retval } +define <2 x i32> @test21vec(<2 x i32> %a) { +; CHECK-LABEL: @test21vec( +; CHECK-NEXT: [[TMP_6:%.*]] = lshr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP_6_LOBIT:%.*]] = and <2 x i32> [[TMP_6]], +; CHECK-NEXT: ret <2 x i32> [[TMP_6_LOBIT]] +; + %tmp.6 = and <2 x i32> %a, + %not.tmp.7 = icmp ne <2 x i32> %tmp.6, zeroinitializer + %retval = zext <2 x i1> %not.tmp.7 to <2 x i32> + ret <2 x i32> %retval +} + define i1 @test22(i32 %A, i32 %X) { ; CHECK-LABEL: @test22( ; CHECK-NEXT: ret i1 true @@ -318,6 +341,18 @@ ret i32 %tmp.3 } +define <2 x i32> @test23vec(<2 x i32> %a) { +; CHECK-LABEL: @test23vec( +; CHECK-NEXT: [[TMP_1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[TMP_1]], +; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; + %tmp.1 = and <2 x i32> %a, + %tmp.2 = icmp eq <2 x i32> %tmp.1, zeroinitializer + %tmp.3 = zext <2 x i1> %tmp.2 to <2 x i32> + ret <2 x i32> %tmp.3 +} + define i32 @test24(i32 %a) { ; CHECK-LABEL: @test24( ; CHECK-NEXT: [[TMP_1:%.*]] = lshr i32 %a, 2 @@ -332,6 +367,20 @@ ret i32 %tmp.3 } +define <2 x i32> @test24vec(<2 x i32> %a) { +; CHECK-LABEL: @test24vec( +; CHECK-NEXT: [[TMP_1:%.*]] = lshr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP_1_LOBIT:%.*]] = and <2 x i32> [[TMP_1]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[TMP_1_LOBIT]], +; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; + %tmp1 = and <2 x i32> %a, + %tmp.1 = lshr <2 x i32> %tmp1, + %tmp.2 = icmp eq <2 x i32> %tmp.1, zeroinitializer + %tmp.3 = zext <2 x i1> %tmp.2 to <2 x i32> + ret <2 x i32> %tmp.3 +} + define i1 @test25(i32 %A) { ; CHECK-LABEL: @test25( ; CHECK-NEXT: ret i1 false