Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -20,6 +20,50 @@ #define DEBUG_TYPE "instcombine" +// Given pattern: +// (x shiftopcode Q) shiftopcode K +// we should rewrite it as +// x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x) +// This is valid for any shift, but they must be identical. +static Instruction * +reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0, + const SimplifyQuery &SQ) { + // Look for: (x shiftopcode ShAmt0) shiftopcode ShAmt1 + Value *X, *ShAmt1, *Sh1Value, *ShAmt0; + if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)), + m_Value(Sh1Value)), + m_Value(ShAmt0)))) + return nullptr; + auto *Sh1 = cast(Sh1Value); + + // The shift opcodes must be identical. + Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode(); + if (ShiftOpcode != Sh1->getOpcode()) + return nullptr; + // Can we fold (ShAmt0+ShAmt1) ? + Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1, + SQ.getWithInstruction(Sh0)); + if (!NewShAmt) + return nullptr; // Did not simplify. + // Is the new shift amount smaller than the bit width? + // FIXME: could also rely on ConstantRange. + unsigned BitWidth = X->getType()->getScalarSizeInBits(); + if (!match(NewShAmt, m_SpecificInt_ULT(APInt(BitWidth, BitWidth)))) + return nullptr; + // All good, we can do this fold. + BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt); + // If both of the original shifts had the same flag set, preserve the flag. + if (ShiftOpcode == Instruction::BinaryOps::Shl) { + NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() && + Sh1->hasNoUnsignedWrap()); + NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() && + Sh1->hasNoSignedWrap()); + } else { + NewShift->setIsExact(Sh0->isExact() && Sh1->isExact()); + } + return NewShift; +} + Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); assert(Op0->getType() == Op1->getType()); @@ -38,6 +82,10 @@ if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; + if (Instruction *NewShift = + reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ)) + return NewShift; + // (C1 shift (A add C2)) -> (C1 shift C2) shift A) // iff A and C2 are both positive. Value *A; Index: llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation.ll +++ llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation.ll @@ -12,10 +12,7 @@ define i32 @t0(i32 %x, i32 %y) { ; CHECK-LABEL: @t0( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2 -; CHECK-NEXT: [[T3:%.*]] = lshr exact i32 [[T1]], [[T2]] +; CHECK-NEXT: [[T3:%.*]] = lshr i32 [[X:%.*]], 30 ; CHECK-NEXT: ret i32 [[T3]] ; %t0 = sub i32 32, %y @@ -27,10 +24,7 @@ define <2 x i32> @t1_vec_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[Y]], -; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[T1]], [[T2]] +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t0 = sub <2 x i32> , %y @@ -42,10 +36,7 @@ define <2 x i32> @t2_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @t2_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[Y]], -; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[T1]], [[T2]] +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t0 = sub <2 x i32> , %y @@ -59,10 +50,7 @@ define <3 x i32> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i32> %y) { ; CHECK-LABEL: @t3_vec_nonsplat_undef0( -; CHECK-NEXT: [[T0:%.*]] = sub <3 x i32> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[Y]], -; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[T1]], [[T2]] +; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t0 = sub <3 x i32> , %y @@ -74,10 +62,7 @@ define <3 x i32> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i32> %y) { ; CHECK-LABEL: @t4_vec_nonsplat_undef1( -; CHECK-NEXT: [[T0:%.*]] = sub <3 x i32> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[Y]], -; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[T1]], [[T2]] +; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t0 = sub <3 x i32> , %y @@ -89,10 +74,7 @@ define <3 x i32> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i32> %y) { ; CHECK-LABEL: @t5_vec_nonsplat_undef1( -; CHECK-NEXT: [[T0:%.*]] = sub <3 x i32> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[Y]], -; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[T1]], [[T2]] +; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t0 = sub <3 x i32> , %y @@ -105,10 +87,7 @@ ; Some other shift opcodes define i32 @t6_shl(i32 %x, i32 %y) { ; CHECK-LABEL: @t6_shl( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl nuw i32 [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2 -; CHECK-NEXT: [[T3:%.*]] = shl nsw i32 [[T1]], [[T2]] +; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X:%.*]], 30 ; CHECK-NEXT: ret i32 [[T3]] ; %t0 = sub i32 32, %y @@ -119,10 +98,7 @@ } define i32 @t7_ashr(i32 %x, i32 %y) { ; CHECK-LABEL: @t7_ashr( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = ashr exact i32 [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2 -; CHECK-NEXT: [[T3:%.*]] = ashr i32 [[T1]], [[T2]] +; CHECK-NEXT: [[T3:%.*]] = ashr i32 [[X:%.*]], 30 ; CHECK-NEXT: ret i32 [[T3]] ; %t0 = sub i32 32, %y @@ -135,10 +111,7 @@ ; If the same flag is present on both shifts, it can be kept. define i32 @t8_lshr_exact_flag_preservation(i32 %x, i32 %y) { ; CHECK-LABEL: @t8_lshr_exact_flag_preservation( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2 -; CHECK-NEXT: [[T3:%.*]] = lshr exact i32 [[T1]], [[T2]] +; CHECK-NEXT: [[T3:%.*]] = lshr exact i32 [[X:%.*]], 30 ; CHECK-NEXT: ret i32 [[T3]] ; %t0 = sub i32 32, %y @@ -149,10 +122,7 @@ } define i32 @t9_ashr_exact_flag_preservation(i32 %x, i32 %y) { ; CHECK-LABEL: @t9_ashr_exact_flag_preservation( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = ashr exact i32 [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2 -; CHECK-NEXT: [[T3:%.*]] = ashr exact i32 [[T1]], [[T2]] +; CHECK-NEXT: [[T3:%.*]] = ashr exact i32 [[X:%.*]], 30 ; CHECK-NEXT: ret i32 [[T3]] ; %t0 = sub i32 32, %y @@ -163,10 +133,7 @@ } define i32 @t10_shl_nuw_flag_preservation(i32 %x, i32 %y) { ; CHECK-LABEL: @t10_shl_nuw_flag_preservation( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl nuw i32 [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2 -; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw i32 [[T1]], [[T2]] +; CHECK-NEXT: [[T3:%.*]] = shl nuw i32 [[X:%.*]], 30 ; CHECK-NEXT: ret i32 [[T3]] ; %t0 = sub i32 32, %y @@ -177,10 +144,7 @@ } define i32 @t11_shl_nsw_flag_preservation(i32 %x, i32 %y) { ; CHECK-LABEL: @t11_shl_nsw_flag_preservation( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl nsw i32 [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2 -; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw i32 [[T1]], [[T2]] +; CHECK-NEXT: [[T3:%.*]] = shl nsw i32 [[X:%.*]], 30 ; CHECK-NEXT: ret i32 [[T3]] ; %t0 = sub i32 32, %y