diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -630,6 +630,21 @@ ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI); if (SignBits >= NumHiDemandedBits) return I->getOperand(0); + + // If we can pre-shift a left-shifted constant to the right without + // losing any low bits (we already know we don't demand the high bits), + // then eliminate the right-shift: + // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X + Value *X; + Constant *C; + if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) { + Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt); + Constant *NewC = ConstantExpr::getLShr(C, RightShiftAmtC); + if (ConstantExpr::getShl(NewC, RightShiftAmtC) == C) { + Instruction *Shl = BinaryOperator::CreateShl(NewC, X); + return InsertNewInstWith(Shl, *I); + } + } } // Unsigned shift right. diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll --- a/llvm/test/Transforms/InstCombine/shift-shift.ll +++ b/llvm/test/Transforms/InstCombine/shift-shift.ll @@ -421,11 +421,12 @@ ret i32 %r } +; Pre-shift a constant to eliminate lshr. + define i8 @shl_lshr_demand1(i8 %x) { ; CHECK-LABEL: @shl_lshr_demand1( -; CHECK-NEXT: [[SHL:%.*]] = shl i8 40, [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i8 [[SHL]], 3 -; CHECK-NEXT: [[R:%.*]] = or i8 [[LSHR]], -32 +; CHECK-NEXT: [[TMP1:%.*]] = shl i8 5, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP1]], -32 ; CHECK-NEXT: ret i8 [[R]] ; %shl = shl i8 40, %x ; 0b0010_1000 @@ -434,11 +435,13 @@ ret i8 %r } +; Pre-shift a constant to eliminate disguised lshr. + define i8 @shl_ashr_demand2(i8 %x) { ; CHECK-LABEL: @shl_ashr_demand2( ; CHECK-NEXT: [[SHL:%.*]] = shl i8 40, [[X:%.*]] ; CHECK-NEXT: call void @use8(i8 [[SHL]]) -; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i8 [[SHL]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = shl i8 5, [[X]] ; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP1]], -32 ; CHECK-NEXT: ret i8 [[R]] ; @@ -449,6 +452,8 @@ ret i8 %r } +; It is not safe to pre-shift because we demand an extra high bit. + define i8 @shl_lshr_demand3(i8 %x) { ; CHECK-LABEL: @shl_lshr_demand3( ; CHECK-NEXT: [[SHL:%.*]] = shl i8 40, [[X:%.*]] @@ -462,6 +467,8 @@ ret i8 %r } +; It is not valid to pre-shift because we lose the low bit of 44. + define i8 @shl_lshr_demand4(i8 %x) { ; CHECK-LABEL: @shl_lshr_demand4( ; CHECK-NEXT: [[SHL:%.*]] = shl i8 44, [[X:%.*]] @@ -475,11 +482,12 @@ ret i8 %r } +; Splat vectors work too, and we don't care what instruction reduces demand for high bits. + define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr exact <2 x i8> [[SHL]], -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6> +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] ; %shl = shl <2 x i8> , %x ; 0b1001_0100 @@ -488,6 +496,8 @@ ret <2 x i6> %r } +; TODO: allow undef/poison elements for this transform. + define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_undef_left( ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] @@ -501,6 +511,8 @@ ret <2 x i6> %r } +; TODO: allow undef/poison elements for this transform. + define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_undef_right( ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] @@ -514,6 +526,8 @@ ret <2 x i6> %r } +; TODO: allow non-splat vector constants. + define <2 x i6> @shl_lshr_demand5_nonuniform_vec_left(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_left( ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] @@ -527,11 +541,12 @@ ret <2 x i6> %r } +; non-splat shl constant is ok. + define <2 x i6> @shl_lshr_demand5_nonuniform_vec_right(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_right( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6> +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] ; %shl = shl <2 x i8> , %x ; 0b1001_0100, 0b1001_0000 @@ -540,6 +555,8 @@ ret <2 x i6> %r } +; This is possible, but may require significant changes to the demanded bits framework. + define <2 x i6> @shl_lshr_demand5_nonuniform_vec_both(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_both( ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] @@ -553,11 +570,12 @@ ret <2 x i6> %r } +; 'and' can reduce demand for high bits too. + define i16 @shl_lshr_demand6(i16 %x) { ; CHECK-LABEL: @shl_lshr_demand6( -; CHECK-NEXT: [[SHL:%.*]] = shl i16 -32624, [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i16 [[SHL]], 4 -; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR]], 4094 +; CHECK-NEXT: [[TMP1:%.*]] = shl i16 2057, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i16 [[TMP1]], 4094 ; CHECK-NEXT: ret i16 [[R]] ; %shl = shl i16 32912, %x ; 0b1000_0000_1001_0000