diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -549,6 +549,16 @@ SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1)) return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); + // If low order bits are not demanded and are known to be zero in RHS, + // then we don't need to demand them from LHS, since they can't cause a + // borrow from any bits that are demanded in the result. + unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countTrailingOnes(); + APInt DemandedFromLHS = DemandedFromOps; + DemandedFromLHS.clearLowBits(NTZ); + if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) || + SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Depth + 1)) + return I; + // If we are known to be subtracting zeros from every bit below // the highest demanded bit, we just return the other side. if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -2120,12 +2120,12 @@ ret i8 %r } +; sub becomes negate and combines with shl + define i8 @shrink_sub_from_constant_lowbits(i8 %x) { ; CHECK-LABEL: @shrink_sub_from_constant_lowbits( -; CHECK-NEXT: [[X000:%.*]] = shl i8 [[X:%.*]], 3 -; CHECK-NEXT: [[SUB:%.*]] = sub i8 7, [[X000]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[SUB]], -8 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: [[X000_NEG:%.*]] = mul i8 [[X:%.*]], -8 +; CHECK-NEXT: ret i8 [[X000_NEG]] ; %x000 = shl i8 %x, 3 ; 3 low bits are known zero %sub = sub i8 7, %x000 @@ -2133,6 +2133,8 @@ ret i8 %r } +; negative test - extra use prevents shrinking '7' + define i8 @shrink_sub_from_constant_lowbits_uses(i8 %x) { ; CHECK-LABEL: @shrink_sub_from_constant_lowbits_uses( ; CHECK-NEXT: [[X000:%.*]] = shl i8 [[X:%.*]], 3 @@ -2148,10 +2150,12 @@ ret i8 %r } +; safe to clear 3 low bits (2 higher bits remain set) + define i8 @shrink_sub_from_constant_lowbits2(i8 %x) { ; CHECK-LABEL: @shrink_sub_from_constant_lowbits2( ; CHECK-NEXT: [[X000:%.*]] = and i8 [[X:%.*]], -8 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i8 30, [[X000]] +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i8 24, [[X000]] ; CHECK-NEXT: [[R:%.*]] = and i8 [[SUB]], -16 ; CHECK-NEXT: ret i8 [[R]] ; @@ -2161,11 +2165,13 @@ ret i8 %r } +; safe to clear 3 low bits (2 higher bits remain set) + define <2 x i8> @shrink_sub_from_constant_lowbits3(<2 x i8> %x) { ; CHECK-LABEL: @shrink_sub_from_constant_lowbits3( ; CHECK-NEXT: [[X0000:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> , [[X0000]] -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i8> [[SUB]], +; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> , [[X0000]] +; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i8> [[SUB]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x0000 = shl <2 x i8> %x, ; 4 low bits are known zero @@ -2174,12 +2180,14 @@ ret <2 x i8> %r } +; eliminate the mask of y or the mask of the result + define i8 @demand_sub_from_variable_lowbits(i8 %x, i8 %y) { ; CHECK-LABEL: @demand_sub_from_variable_lowbits( ; CHECK-NEXT: [[X000:%.*]] = shl i8 [[X:%.*]], 3 -; CHECK-NEXT: [[Y000:%.*]] = and i8 [[Y:%.*]], -8 -; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[Y000]], [[X000]] -; CHECK-NEXT: ret i8 [[SUB]] +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[Y:%.*]], [[X000]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[SUB]], -8 +; CHECK-NEXT: ret i8 [[R]] ; %x000 = shl i8 %x, 3 ; 3 low bits are known zero %y000 = and i8 %y, -8 @@ -2188,11 +2196,12 @@ ret i8 %r } +; setting the low 3 bits of y doesn't change anything + define i8 @demand_sub_from_variable_lowbits2(i8 %x, i8 %y) { ; CHECK-LABEL: @demand_sub_from_variable_lowbits2( ; CHECK-NEXT: [[X0000:%.*]] = shl i8 [[X:%.*]], 4 -; CHECK-NEXT: [[Y111:%.*]] = or i8 [[Y:%.*]], 7 -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i8 [[Y111]], [[X0000]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i8 [[Y:%.*]], [[X0000]] ; CHECK-NEXT: [[R:%.*]] = lshr i8 [[SUB]], 4 ; CHECK-NEXT: ret i8 [[R]] ; @@ -2203,6 +2212,8 @@ ret i8 %r } +; negative test - the mask of y removes an extra bit, so that instruction is needed + define i8 @demand_sub_from_variable_lowbits3(i8 %x, i8 %y) { ; CHECK-LABEL: @demand_sub_from_variable_lowbits3( ; CHECK-NEXT: [[X0000:%.*]] = shl i8 [[X:%.*]], 4