diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1295,6 +1295,48 @@ return new ICmpInst(Pred, X, Cmp.getOperand(1)); } + // (icmp eq/ne (mul X Y)) -> (icmp eq/ne X/Y) if we know about whether X/Y are + // odd/non-zero/there is no overflow. + if (match(Cmp.getOperand(0), m_Mul(m_Value(X), m_Value(Y))) && + ICmpInst::isEquality(Pred)) { + + KnownBits XKnown = computeKnownBits(X, 0, &Cmp); + // if X % 2 != 0 + // (icmp eq/ne Y) + if (XKnown.countMaxTrailingZeros() == 0) + return new ICmpInst(Pred, Y, Cmp.getOperand(1)); + + KnownBits YKnown = computeKnownBits(Y, 0, &Cmp); + // if Y % 2 != 0 + // (icmp eq/ne X) + if (YKnown.countMaxTrailingZeros() == 0) + return new ICmpInst(Pred, X, Cmp.getOperand(1)); + + auto *BO0 = cast(Cmp.getOperand(0)); + if (BO0->hasNoUnsignedWrap() || BO0->hasNoSignedWrap()) { + const SimplifyQuery Q = SQ.getWithInstruction(&Cmp); + // `isKnownNonZero` does more analysis than just `!KnownBits.One.isZero()` + // but to avoid unnecessary work, first just if this is an obvious case. + + // if X non-zero and NoOverflow(X * Y) + // (icmp eq/ne Y) + if (!XKnown.One.isZero() || isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT)) + return new ICmpInst(Pred, Y, Cmp.getOperand(1)); + + // if Y non-zero and NoOverflow(X * Y) + // (icmp eq/ne X) + if (!YKnown.One.isZero() || isKnownNonZero(Y, DL, 0, Q.AC, Q.CxtI, Q.DT)) + return new ICmpInst(Pred, X, Cmp.getOperand(1)); + } + // Note, we are skipping cases: + // if Y % 2 != 0 AND X % 2 != 0 + // (false/true) + // if X non-zero and Y non-zero and NoOverflow(X * Y) + // (false/true) + // Those can be simplified later as we would have already replaced the (icmp + // eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that + // will fold to a constant elsewhere. + } return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/icmp-binop.ll b/llvm/test/Transforms/InstCombine/icmp-binop.ll --- a/llvm/test/Transforms/InstCombine/icmp-binop.ll +++ b/llvm/test/Transforms/InstCombine/icmp-binop.ll @@ -6,8 +6,7 @@ define i1 @mul_unkV_oddC_eq(i32 %v) { ; CHECK-LABEL: @mul_unkV_oddC_eq( -; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[V:%.*]], 3 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %mul = mul i32 %v, 3 @@ -28,8 +27,7 @@ define <2 x i1> @mul_unkV_oddC_ne_vec(<2 x i64> %v) { ; CHECK-LABEL: @mul_unkV_oddC_ne_vec( -; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i64> [[V:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i64> [[MUL]], zeroinitializer +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i64> [[V:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %mul = mul <2 x i64> %v, @@ -72,7 +70,7 @@ define i1 @mul_reused_unkV_oddC_ne(i64 %v) { ; CHECK-LABEL: @mul_reused_unkV_oddC_ne( ; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[V:%.*]], 3 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[V]], 0 ; CHECK-NEXT: call void @use64(i64 [[MUL]]) ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -87,8 +85,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i16 [[V2:%.*]], 1 ; CHECK-NEXT: [[ODD:%.*]] = icmp ne i16 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[ODD]]) -; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[V:%.*]], [[V2]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[V:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %lb = and i16 %v2, 1 @@ -105,7 +102,7 @@ ; CHECK-NEXT: [[ODD:%.*]] = icmp ne i64 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[ODD]]) ; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[V]], [[V2:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[V2]], 0 ; CHECK-NEXT: call void @use64(i64 [[MUL]]) ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -120,9 +117,7 @@ define <2 x i1> @mul_setoddV_unkV_ne(<2 x i32> %v1, <2 x i32> %v2) { ; CHECK-LABEL: @mul_setoddV_unkV_ne( -; CHECK-NEXT: [[V:%.*]] = or <2 x i32> [[V1:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[V]], [[V2:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[MUL]], zeroinitializer +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V2:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %v = or <2 x i32> %v1, @@ -190,8 +185,7 @@ ; CHECK-LABEL: @mul_assumenzV_unkV_nsw_ne( ; CHECK-NEXT: [[NZ:%.*]] = icmp ne i32 [[V:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[NZ]]) -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[V]], [[V2:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[V2:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %nz = icmp ne i32 %v, 0 @@ -229,9 +223,7 @@ define i1 @mul_setnzV_unkV_nuw_eq(i8 %v1, i8 %v2) { ; CHECK-LABEL: @mul_setnzV_unkV_nuw_eq( -; CHECK-NEXT: [[V:%.*]] = or i8 [[V1:%.*]], 2 -; CHECK-NEXT: [[MUL:%.*]] = mul nuw i8 [[V]], [[V2:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[V2:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %v = or i8 %v1, 2 @@ -245,8 +237,7 @@ ; CHECK-NEXT: [[NZ_NOT:%.*]] = icmp eq i64 [[V2:%.*]], 0 ; CHECK-NEXT: br i1 [[NZ_NOT]], label [[FALSE:%.*]], label [[TRUE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[V:%.*]], [[V2]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[V:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; CHECK: false: ; CHECK-NEXT: call void @use64(i64 [[V]]) diff --git a/llvm/test/Transforms/InstCombine/pr38677.ll b/llvm/test/Transforms/InstCombine/pr38677.ll --- a/llvm/test/Transforms/InstCombine/pr38677.ll +++ b/llvm/test/Transforms/InstCombine/pr38677.ll @@ -12,9 +12,7 @@ ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: ; CHECK-NEXT: [[USE2:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ select (i1 icmp eq (ptr @A, ptr @B), i32 2, i32 1), [[DELAY]] ] -; CHECK-NEXT: [[B7:%.*]] = mul i32 [[USE2]], 2147483647 -; CHECK-NEXT: [[C3:%.*]] = icmp eq i32 [[B7]], 0 -; CHECK-NEXT: store i1 [[C3]], ptr [[DST:%.*]], align 1 +; CHECK-NEXT: store i1 false, ptr [[DST:%.*]], align 1 ; CHECK-NEXT: ret i32 [[USE2]] ; entry: