diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4607,12 +4607,15 @@ // (A >> C) == (B >> C) --> (A^B) u< (1 << C) // For lshr and ashr pairs. - if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) && - match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) || - (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) && - match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) { - unsigned TypeBits = Cst1->getBitWidth(); - unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); + const APInt *AP1, *AP2; + if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_APInt(AP1)))) && + match(Op1, m_OneUse(m_LShr(m_Value(B), m_APInt(AP2))))) || + (match(Op0, m_OneUse(m_AShr(m_Value(A), m_APInt(AP1)))) && + match(Op1, m_OneUse(m_AShr(m_Value(B), m_APInt(AP2)))))) { + if (AP1 != AP2) + return nullptr; + unsigned TypeBits = AP1->getBitWidth(); + unsigned ShAmt = AP1->getLimitedValue(TypeBits); if (ShAmt < TypeBits && ShAmt != 0) { ICmpInst::Predicate NewPred = Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; diff --git a/llvm/test/Transforms/InstCombine/compare-signs.ll b/llvm/test/Transforms/InstCombine/compare-signs.ll --- a/llvm/test/Transforms/InstCombine/compare-signs.ll +++ b/llvm/test/Transforms/InstCombine/compare-signs.ll @@ -47,17 +47,32 @@ ret i32 %t3 } -; TODO this should optimize but doesn't due to missing vector support in InstCombiner::foldICmpEquality. define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone { ; CHECK-LABEL: @test3vec( +; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[T2:%.*]] = icmp sgt <2 x i32> [[T2_UNSHIFTED]], +; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[T3]] +; + %t0 = lshr <2 x i32> %a, + %t1 = lshr <2 x i32> %b, + %t2 = icmp eq <2 x i32> %t0, %t1 + %t3 = zext <2 x i1> %t2 to <2 x i32> + ret <2 x i32> %t3 +} + +; negative test + +define <2 x i32> @test4vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone { +; CHECK-LABEL: @test4vec( ; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], ; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]] ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t0 = lshr <2 x i32> %a, - %t1 = lshr <2 x i32> %b, + %t1 = lshr <2 x i32> %b, %t2 = icmp eq <2 x i32> %t0, %t1 %t3 = zext <2 x i1> %t2 to <2 x i32> ret <2 x i32> %t3