diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -154,6 +154,29 @@ if (Depth == 0 && !V->hasOneUse()) DemandedMask.setAllBits(); + // If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care + // about the high bits of the operands. + auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) { + unsigned NLZ = DemandedMask.countLeadingZeros(); + // Right fill the mask of bits for the operands to demand the most + // significant bit and all those below it. + DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ); + if (ShrinkDemandedConstant(I, 0, DemandedFromOps) || + SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) || + ShrinkDemandedConstant(I, 1, DemandedFromOps) || + SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) { + if (NLZ > 0) { + // Disable the nsw and nuw flags here: We can no longer guarantee that + // we won't wrap after simplification. Removing the nsw/nuw flags is + // legal here because the top bit is not demanded. + I->setHasNoSignedWrap(false); + I->setHasNoUnsignedWrap(false); + } + return true; + } + return false; + }; + switch (I->getOpcode()) { default: computeKnownBits(I, Known, Depth, CxtI); @@ -507,26 +530,9 @@ } LLVM_FALLTHROUGH; case Instruction::Sub: { - /// If the high-bits of an ADD/SUB are not demanded, then we do not care - /// about the high bits of the operands. - unsigned NLZ = DemandedMask.countLeadingZeros(); - // Right fill the mask of bits for this ADD/SUB to demand the most - // significant bit and all those below it. - APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); - if (ShrinkDemandedConstant(I, 0, DemandedFromOps) || - SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) || - ShrinkDemandedConstant(I, 1, DemandedFromOps) || - SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) { - if (NLZ > 0) { - // Disable the nsw and nuw flags here: We can no longer guarantee that - // we won't wrap after simplification. Removing the nsw/nuw flags is - // legal here because the top bit is not demanded. - BinaryOperator &BinOP = *cast(I); - BinOP.setHasNoSignedWrap(false); - BinOP.setHasNoUnsignedWrap(false); - } + APInt DemandedFromOps; + if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps)) return I; - } // If we are known to be adding/subtracting zeros to every bit below // the highest demanded bit, we just return the other side. @@ -545,6 +551,10 @@ break; } case Instruction::Mul: { + APInt DemandedFromOps; + if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps)) + return I; + if (DemandedMask.isPowerOf2()) { // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1. // If we demand exactly one bit N and we have "X * (C' << N)" where C' is diff --git a/llvm/test/Transforms/InstCombine/and-or.ll b/llvm/test/Transforms/InstCombine/and-or.ll --- a/llvm/test/Transforms/InstCombine/and-or.ll +++ b/llvm/test/Transforms/InstCombine/and-or.ll @@ -290,7 +290,7 @@ define i8 @and_xor_hoist_mask_commute(i8 %a, i8 %b) { ; CHECK-LABEL: @and_xor_hoist_mask_commute( -; CHECK-NEXT: [[C:%.*]] = mul i8 [[B:%.*]], 43 +; CHECK-NEXT: [[C:%.*]] = mul i8 [[B:%.*]], 3 ; CHECK-NEXT: [[SH:%.*]] = lshr i8 [[A:%.*]], 6 ; CHECK-NEXT: [[C_MASKED:%.*]] = and i8 [[C]], 3 ; CHECK-NEXT: [[AND:%.*]] = xor i8 [[C_MASKED]], [[SH]] @@ -305,7 +305,7 @@ define <2 x i8> @and_or_hoist_mask_commute_vec_splat(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @and_or_hoist_mask_commute_vec_splat( -; CHECK-NEXT: [[C:%.*]] = mul <2 x i8> [[B:%.*]], +; CHECK-NEXT: [[C:%.*]] = mul <2 x i8> [[B:%.*]], ; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], ; CHECK-NEXT: [[C_MASKED:%.*]] = and <2 x i8> [[C]], ; CHECK-NEXT: [[AND:%.*]] = or <2 x i8> [[C_MASKED]], [[SH]] diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll --- a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll @@ -37,8 +37,8 @@ define i1 @mul_mask_pow2_ne0_use2(i8 %x) { ; CHECK-LABEL: @mul_mask_pow2_ne0_use2( -; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X:%.*]], 3 -; CHECK-NEXT: [[AND:%.*]] = and i8 [[TMP1]], 8 +; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[X:%.*]], 3 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[MUL]], 8 ; CHECK-NEXT: call void @use(i8 [[AND]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[AND]], 0 ; CHECK-NEXT: ret i1 [[CMP]] @@ -96,7 +96,7 @@ define i1 @mul_mask_notpow2_ne(i8 %x) { ; CHECK-LABEL: @mul_mask_notpow2_ne( -; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[X:%.*]], 60 +; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[X:%.*]], 12 ; CHECK-NEXT: [[AND:%.*]] = and i8 [[MUL]], 12 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[AND]], 0 ; CHECK-NEXT: ret i1 [[CMP]] @@ -121,7 +121,7 @@ define i1 @pr40493_neg1(i32 %area) { ; CHECK-LABEL: @pr40493_neg1( -; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[AREA:%.*]], 11 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[AREA:%.*]], 3 ; CHECK-NEXT: [[REM:%.*]] = and i32 [[MUL]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 0 ; CHECK-NEXT: ret i1 [[CMP]] @@ -147,8 +147,8 @@ define i32 @pr40493_neg3(i32 %area) { ; CHECK-LABEL: @pr40493_neg3( -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[AREA:%.*]], 2 -; CHECK-NEXT: [[REM:%.*]] = and i32 [[TMP1]], 4 +; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[AREA:%.*]], 2 +; CHECK-NEXT: [[REM:%.*]] = and i32 [[MUL]], 4 ; CHECK-NEXT: ret i32 [[REM]] ; %mul = mul i32 %area, 12 @@ -222,10 +222,7 @@ define i1 @pr51551(i32 %x, i32 %y) { ; CHECK-LABEL: @pr51551( -; CHECK-NEXT: [[T0:%.*]] = and i32 [[Y:%.*]], -8 -; CHECK-NEXT: [[T1:%.*]] = or i32 [[T0]], 1 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[T1]], [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], 3 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 3 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -239,10 +236,7 @@ define i1 @pr51551_2(i32 %x, i32 %y) { ; CHECK-LABEL: @pr51551_2( -; CHECK-NEXT: [[T0:%.*]] = and i32 [[Y:%.*]], -8 -; CHECK-NEXT: [[T1:%.*]] = or i32 [[T0]], 1 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[T1]], [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], 1 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -256,9 +250,9 @@ define i1 @pr51551_neg1(i32 %x, i32 %y) { ; CHECK-LABEL: @pr51551_neg1( -; CHECK-NEXT: [[T0:%.*]] = and i32 [[Y:%.*]], -4 +; CHECK-NEXT: [[T0:%.*]] = and i32 [[Y:%.*]], 4 ; CHECK-NEXT: [[T1:%.*]] = or i32 [[T0]], 1 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[T1]], [[X:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[T1]], [[X:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], 7 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: ret i1 [[CMP]] @@ -273,8 +267,8 @@ define i1 @pr51551_neg2(i32 %x, i32 %y) { ; CHECK-LABEL: @pr51551_neg2( -; CHECK-NEXT: [[T0:%.*]] = and i32 [[Y:%.*]], -7 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[T0]], [[X:%.*]] +; CHECK-NEXT: [[T0:%.*]] = and i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[T0]], [[X:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], 7 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: ret i1 [[CMP]] @@ -288,10 +282,7 @@ define i32 @pr51551_demand3bits(i32 %x, i32 %y) { ; CHECK-LABEL: @pr51551_demand3bits( -; CHECK-NEXT: [[T0:%.*]] = and i32 [[Y:%.*]], -8 -; CHECK-NEXT: [[T1:%.*]] = or i32 [[T0]], 1 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[T1]], [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], 7 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 7 ; CHECK-NEXT: ret i32 [[AND]] ; %t0 = and i32 %y, -7 diff --git a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll --- a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll @@ -1073,7 +1073,7 @@ define i32 @mulmuladd2(i32 %a0, i32 %a1) { ; CHECK-LABEL: @mulmuladd2( -; CHECK-NEXT: [[ADD_NEG:%.*]] = sub i32 -16, [[A0:%.*]] +; CHECK-NEXT: [[ADD_NEG:%.*]] = sub i32 1073741808, [[A0:%.*]] ; CHECK-NEXT: [[MUL1_NEG:%.*]] = mul i32 [[ADD_NEG]], [[A1:%.*]] ; CHECK-NEXT: [[MUL2:%.*]] = shl i32 [[MUL1_NEG]], 2 ; CHECK-NEXT: ret i32 [[MUL2]] diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -1134,7 +1134,7 @@ define i32 @mulmuladd2(i32 %a0, i32 %a1) { ; CHECK-LABEL: @mulmuladd2( -; CHECK-NEXT: [[ADD_NEG:%.*]] = sub i32 -16, [[A0:%.*]] +; CHECK-NEXT: [[ADD_NEG:%.*]] = sub i32 1073741808, [[A0:%.*]] ; CHECK-NEXT: [[MUL1_NEG:%.*]] = mul i32 [[ADD_NEG]], [[A1:%.*]] ; CHECK-NEXT: [[MUL2:%.*]] = shl i32 [[MUL1_NEG]], 2 ; CHECK-NEXT: ret i32 [[MUL2]]