Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1894,11 +1894,12 @@ return new ICmpInst(NewPred, X, SubOne(cast(Cmp.getOperand(1)))); } - // (X & C2) == 0 -> (trunc X) >= 0 - // (X & C2) != 0 -> (trunc X) < 0 - // iff C2 is a power of 2 and it masks the sign bit of a legal integer type. const APInt *C2; if (And->hasOneUse() && C.isZero() && match(Y, m_APInt(C2))) { + // (X & C2) == 0 -> (trunc X) >= 0 + // (X & C2) != 0 -> (trunc X) < 0 + // iff C2 is a power of 2 and it masks the sign bit of a legal integer + // type. int32_t ExactLogBase2 = C2->exactLogBase2(); if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) { Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1); @@ -1909,6 +1910,45 @@ Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT; return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy)); } + + const APInt *C1; + Value *X1; + // ((X1 * C1) & C2) ==/!= 0 -> ((X1 * (C1/C2)) & 1) ==/!= 0 + // iff C2 is a power of 2 greater than 1 and C1 is a multiple of C2 + if (match(X, m_Mul(m_Value(X1), m_APInt(C1))) && C2->isPowerOf2() && + !C2->isOne() && (C1->urem(*C2)).isZero()) { + const auto NewDiv = C1->udiv(*C2); + auto *NewMul = + Builder.CreateMul(X1, ConstantInt::get(X1->getType(), NewDiv)); + auto *NewAnd = + Builder.CreateAnd(NewMul, ConstantInt::get(NewMul->getType(), 1)); + return new ICmpInst(Cmp.getPredicate(), NewAnd, Cmp.getOperand(1)); + } + + Value *X2; + // ((X1 * X2) & C2) ==/!= 0 -> (X1 & C2) ==/!= 0 + // iff (C2 + 1) is a power of 2 and (X2 & C2) == 1 + if (match(X, m_Mul(m_Value(X1), m_Value(X2))) && C2->isMask()) { + auto TrailingOnes = C2->countTrailingOnes(); + auto ExpectedZero = + C2->usub_sat(APInt::getOneBitSet(C2->getBitWidth(), 0)); + + auto Check = [&](Value *Xi, Value *Xj) -> Instruction * { + KnownBits XiKnown = computeKnownBits(Xi, 0, &Cmp); + if (XiKnown.One.getLoBits(1).isOne() && + XiKnown.Zero.getLoBits(TrailingOnes) == ExpectedZero) { + auto *NewAnd = + Builder.CreateAnd(Xj, ConstantInt::get(Xj->getType(), *C2)); + return new ICmpInst(Cmp.getPredicate(), NewAnd, Cmp.getOperand(1)); + } + return nullptr; + }; + + if (Instruction *I = Check(X1, X2)) + return I; + if (Instruction *I = Check(X2, X1)) + return I; + } } return nullptr; Index: llvm/test/Transforms/InstCombine/pr40493.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/pr40493.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Test that the optimization described in PR#40493 is performed +define i1 @test1(i32 %area) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[AREA:%.*]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %mul = mul i32 %area, 12 + %rem = and i32 %mul, 4 + %cmp = icmp eq i32 %rem, 0 + ret i1 %cmp +} + +define i1 @test1_neg1(i32 %area) { +; CHECK-LABEL: @test1_neg1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[AREA:%.*]], 11 +; CHECK-NEXT: [[REM:%.*]] = and i32 [[MUL]], 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %mul = mul i32 %area, 11 + %rem = and i32 %mul, 4 + %cmp = icmp eq i32 %rem, 0 + ret i1 %cmp +} + +define i1 @test1_neg2(i32 %area) { +; CHECK-LABEL: @test1_neg2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[AREA:%.*]], 12 +; CHECK-NEXT: [[REM:%.*]] = and i32 [[MUL]], 12 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %mul = mul i32 %area, 12 + %rem = and i32 %mul, 15 + %cmp = icmp eq i32 %rem, 0 + ret i1 %cmp +} + +define i32 @test1_neg3(i32 %area) { +; CHECK-LABEL: @test1_neg3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[AREA:%.*]], 12 +; CHECK-NEXT: [[REM:%.*]] = and i32 [[MUL]], 4 +; CHECK-NEXT: ret i32 [[REM]] +; +entry: + %mul = mul i32 %area, 12 + %rem = and i32 %mul, 4 + ret i32 %rem +} + +; Test that the optimization described in PR#51551 is performed +define i1 @test2(i32 %x, i32 %y) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[X:%.*]], 3 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %0 = and i32 %y, -7 + %1 = or i32 %0, 1 + %mul = mul nsw i32 %1, %x + %and = and i32 %mul, 3 + %cmp = icmp eq i32 %and, 0 + ret i1 %cmp +} + +define i1 @test2_2(i32 %x, i32 %y) { +; CHECK-LABEL: @test2_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %0 = and i32 %y, -7 + %1 = or i32 %0, 1 + %mul = mul nsw i32 %1, %x + %and = and i32 %mul, 1 + %cmp = icmp eq i32 %and, 0 + ret i1 %cmp +} + +define i1 @test2_neg1(i32 %x, i32 %y) { +; CHECK-LABEL: @test2_neg1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[Y:%.*]], -4 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP0]], 1 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %0 = and i32 %y, -3 + %1 = or i32 %0, 1 + %mul = mul nsw i32 %1, %x + %and = and i32 %mul, 7 + %cmp = icmp eq i32 %and, 0 + ret i1 %cmp +} + +define i1 @test2_neg2(i32 %x, i32 %y) { +; CHECK-LABEL: @test2_neg2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[Y:%.*]], -7 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], [[X:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %0 = and i32 %y, -7 + %mul = mul nsw i32 %0, %x + %and = and i32 %mul, 7 + %cmp = icmp eq i32 %and, 0 + ret i1 %cmp +} + +define i32 @test2_neg3(i32 %x, i32 %y) { +; CHECK-LABEL: @test2_neg3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[Y:%.*]], -8 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP0]], 1 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], 7 +; CHECK-NEXT: ret i32 [[AND]] +; +entry: + %0 = and i32 %y, -7 + %1 = or i32 %0, 1 + %mul = mul nsw i32 %1, %x + %and = and i32 %mul, 7 + ret i32 %and +}