Index: lib/Transforms/InstCombine/InstCombineShifts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineShifts.cpp +++ lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -64,6 +64,35 @@ return NewShift; } +// If we have: +// (X & (-1 << Y)) outer>> Y +// Or +// ((X inner>> Y) << Y) outer>> Y +// The mask is redundant, and can be dropped: +// X outer>> Y +// This is valid for both lshr and ashr in both positions and any combination. +static Instruction * +dropRedundantMaskingOfRightShiftInput(BinaryOperator *OuterShift) { + Value *Y; + if (!match(OuterShift, m_Shr(m_Value(), m_Value(Y)))) + return nullptr; + + Value *X; + + // (-1 << Y) + auto Mask = m_Shl(m_AllOnes(), m_Specific(Y)); + // (X & (-1 << Y)) + auto MaskVariant = m_c_And(Mask, m_Value(X)); + + // ((X inner>> Y) << Y) + auto ShiftVariant = m_Shl(m_Shr(m_Value(X), m_Specific(Y)), m_Specific(Y)); + + if (!match(OuterShift->getOperand(0), m_CombineOr(MaskVariant, ShiftVariant))) + return nullptr; + + return BinaryOperator::Create(OuterShift->getOpcode(), X, Y); +} + Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); assert(Op0->getType() == Op1->getType()); @@ -86,6 +115,9 @@ reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ)) return NewShift; + if (Instruction *NewShift = dropRedundantMaskingOfRightShiftInput(&I)) + return NewShift; + // (C1 shift (A add C2)) -> (C1 shift C2) shift A) // iff A and C2 are both positive. Value *A; Index: test/Transforms/InstCombine/redundant-shift-input-masking.ll =================================================================== --- test/Transforms/InstCombine/redundant-shift-input-masking.ll +++ test/Transforms/InstCombine/redundant-shift-input-masking.ll @@ -12,9 +12,7 @@ define i32 @t0_lshr(i32 %data, i32 %nbits) { ; CHECK-LABEL: @t0_lshr( -; CHECK-NEXT: [[T0:%.*]] = shl i32 -1, [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[DATA:%.*]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact i32 [[T1]], [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[DATA:%.*]], [[NBITS:%.*]] ; CHECK-NEXT: ret i32 [[T2]] ; %t0 = shl i32 -1, %nbits @@ -24,9 +22,7 @@ } define i32 @t1_sshr(i32 %data, i32 %nbits) { ; CHECK-LABEL: @t1_sshr( -; CHECK-NEXT: [[T0:%.*]] = shl i32 -1, [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[DATA:%.*]] -; CHECK-NEXT: [[T2:%.*]] = ashr exact i32 [[T1]], [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[DATA:%.*]], [[NBITS:%.*]] ; CHECK-NEXT: ret i32 [[T2]] ; %t0 = shl i32 -1, %nbits @@ -39,9 +35,7 @@ define <4 x i32> @t2_vec(<4 x i32> %data, <4 x i32> %nbits) { ; CHECK-LABEL: @t2_vec( -; CHECK-NEXT: [[T0:%.*]] = shl <4 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = and <4 x i32> [[T0]], [[DATA:%.*]] -; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> [[T1]], [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> [[DATA:%.*]], [[NBITS:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T2]] ; %t0 = shl <4 x i32> , %nbits @@ -52,9 +46,7 @@ define <4 x i32> @t3_vec_undef(<4 x i32> %data, <4 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_undef( -; CHECK-NEXT: [[T0:%.*]] = shl <4 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = and <4 x i32> [[T0]], [[DATA:%.*]] -; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> [[T1]], [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> [[DATA:%.*]], [[NBITS:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T2]] ; %t0 = shl <4 x i32> , %nbits @@ -71,8 +63,7 @@ ; CHECK-LABEL: @t4_extrause0( ; CHECK-NEXT: [[T0:%.*]] = shl i32 -1, [[NBITS:%.*]] ; CHECK-NEXT: call void @use32(i32 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[DATA:%.*]] -; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[T1]], [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[DATA:%.*]], [[NBITS]] ; CHECK-NEXT: ret i32 [[T2]] ; %t0 = shl i32 -1, %nbits @@ -87,7 +78,7 @@ ; CHECK-NEXT: [[T0:%.*]] = shl i32 -1, [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[DATA:%.*]] ; CHECK-NEXT: call void @use32(i32 [[T1]]) -; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[T1]], [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[DATA]], [[NBITS]] ; CHECK-NEXT: ret i32 [[T2]] ; %t0 = shl i32 -1, %nbits @@ -103,7 +94,7 @@ ; CHECK-NEXT: call void @use32(i32 [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = and i32 [[T0]], [[DATA:%.*]] ; CHECK-NEXT: call void @use32(i32 [[T1]]) -; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[T1]], [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[DATA]], [[NBITS]] ; CHECK-NEXT: ret i32 [[T2]] ; %t0 = shl i32 -1, %nbits @@ -122,7 +113,7 @@ ; CHECK-NEXT: call void @use32(i32 [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = shl i32 [[T0]], [[NBITS]] ; CHECK-NEXT: call void @use32(i32 [[T1]]) -; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[T1]], [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[DATA]], [[NBITS]] ; CHECK-NEXT: ret i32 [[T2]] ; %t0 = lshr i32 %data, %nbits @@ -139,7 +130,7 @@ ; CHECK-NEXT: call void @use32(i32 [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = shl i32 [[T0]], [[NBITS]] ; CHECK-NEXT: call void @use32(i32 [[T1]]) -; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[T1]], [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[DATA]], [[NBITS]] ; CHECK-NEXT: ret i32 [[T2]] ; %t0 = lshr i32 %data, %nbits @@ -156,7 +147,7 @@ ; CHECK-NEXT: call void @use32(i32 [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = shl i32 [[T0]], [[NBITS]] ; CHECK-NEXT: call void @use32(i32 [[T1]]) -; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[T1]], [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[DATA]], [[NBITS]] ; CHECK-NEXT: ret i32 [[T2]] ; %t0 = ashr i32 %data, %nbits @@ -173,7 +164,7 @@ ; CHECK-NEXT: call void @use32(i32 [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = shl i32 [[T0]], [[NBITS]] ; CHECK-NEXT: call void @use32(i32 [[T1]]) -; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[T1]], [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[DATA]], [[NBITS]] ; CHECK-NEXT: ret i32 [[T2]] ; %t0 = ashr i32 %data, %nbits @@ -191,9 +182,7 @@ define i32 @t11_commutative(i32 %nbits) { ; CHECK-LABEL: @t11_commutative( ; CHECK-NEXT: [[DATA:%.*]] = call i32 @gen32() -; CHECK-NEXT: [[T0:%.*]] = shl i32 -1, [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = and i32 [[DATA]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[T1]], [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[DATA]], [[NBITS:%.*]] ; CHECK-NEXT: ret i32 [[T2]] ; %data = call i32 @gen32()