Index: llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -296,6 +296,48 @@ return BinaryOperator::Create(Instruction::And, NewShift, NewMask); } +/// If we have a shift-by-constant of a bitwise logic op that itself has a +/// shift-by-constant operand with identical opcode, we may be able to convert +/// that into 2 independent shifts followed by the logic op. This eliminates a +/// a use of an intermediate value (reduces dependency chain). +static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.isShift() && "Expected a shift as input"); + auto *LogicInst = dyn_cast(I.getOperand(0)); + if (!LogicInst || !LogicInst->isBitwiseLogicOp() || !LogicInst->hasOneUse()) + return nullptr; + + const APInt *C0, *C1; + if (!match(I.getOperand(1), m_APInt(C1))) + return nullptr; + + Instruction::BinaryOps ShiftOpcode = I.getOpcode(); + Type *Ty = I.getType(); + + // Find a matching one-use shift by constant. The fold is not valid if the sum + // of the shift values equals or exceeds bitwidth. + Value *X, *Y; + auto matchFirstShift = [&](Value *V) { + return match(V, m_OneUse(m_Shift(m_Value(X), m_APInt(C0)))) && + cast(V)->getOpcode() == ShiftOpcode && + (*C0 + *C1).ult(Ty->getScalarSizeInBits()); + }; + + // Logic ops are commutative, so check each operand for a match. + if (matchFirstShift(LogicInst->getOperand(0))) + Y = LogicInst->getOperand(1); + else if (matchFirstShift(LogicInst->getOperand(1))) + Y = LogicInst->getOperand(0); + else + return nullptr; + + // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1) + Constant *ShiftSumC = ConstantInt::get(Ty, *C0 + *C1); + Value *NewShift1 = Builder.CreateBinOp(ShiftOpcode, X, ShiftSumC); + Value *NewShift2 = Builder.CreateBinOp(ShiftOpcode, Y, I.getOperand(1)); + return BinaryOperator::Create(LogicInst->getOpcode(), NewShift1, NewShift2); +} + Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); assert(Op0->getType() == Op1->getType()); @@ -348,6 +390,9 @@ return &I; } + if (Instruction *Logic = foldShiftOfShiftedLogic(I, Builder)) + return Logic; + return nullptr; } Index: llvm/test/Transforms/InstCombine/bswap.ll =================================================================== --- llvm/test/Transforms/InstCombine/bswap.ll +++ llvm/test/Transforms/InstCombine/bswap.ll @@ -169,10 +169,10 @@ define i32 @bswap32_shl_first_extra_use(i32 %x) { ; CHECK-LABEL: @bswap32_shl_first_extra_use( -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 16 -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], 16 -; CHECK-NEXT: [[SWAPHALF:%.*]] = or i32 [[SHL]], [[SHR]] -; CHECK-NEXT: [[T:%.*]] = shl i32 [[SWAPHALF]], 8 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X]], 24 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[SHR]], 8 +; CHECK-NEXT: [[T:%.*]] = or i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 [[X]]) ; CHECK-NEXT: call void @extra_use(i32 [[T]]) ; CHECK-NEXT: ret i32 [[BSWAP]] Index: llvm/test/Transforms/InstCombine/shift-logic.ll =================================================================== --- llvm/test/Transforms/InstCombine/shift-logic.ll +++ llvm/test/Transforms/InstCombine/shift-logic.ll @@ -3,9 +3,9 @@ define i8 @shl_and(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_and( -; CHECK-NEXT: [[SH0:%.*]] = shl i8 [[X:%.*]], 3 -; CHECK-NEXT: [[R:%.*]] = and i8 [[SH0]], [[Y:%.*]] -; CHECK-NEXT: [[SH1:%.*]] = shl i8 [[R]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X:%.*]], 5 +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[Y:%.*]], 2 +; CHECK-NEXT: [[SH1:%.*]] = and i8 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i8 [[SH1]] ; %sh0 = shl i8 %x, 3 @@ -17,9 +17,9 @@ define i16 @shl_or(i16 %x, i16 %py) { ; CHECK-LABEL: @shl_or( ; CHECK-NEXT: [[Y:%.*]] = srem i16 [[PY:%.*]], 42 -; CHECK-NEXT: [[SH0:%.*]] = shl i16 [[X:%.*]], 5 -; CHECK-NEXT: [[R:%.*]] = or i16 [[Y]], [[SH0]] -; CHECK-NEXT: [[SH1:%.*]] = shl i16 [[R]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = shl i16 [[X:%.*]], 12 +; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i16 [[Y]], 7 +; CHECK-NEXT: [[SH1:%.*]] = or i16 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i16 [[SH1]] ; %y = srem i16 %py, 42 ; thwart complexity-based canonicalization @@ -31,9 +31,9 @@ define i32 @shl_xor(i32 %x, i32 %y) { ; CHECK-LABEL: @shl_xor( -; CHECK-NEXT: [[SH0:%.*]] = shl i32 [[X:%.*]], 5 -; CHECK-NEXT: [[R:%.*]] = xor i32 [[SH0]], [[Y:%.*]] -; CHECK-NEXT: [[SH1:%.*]] = shl i32 [[R]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 12 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[Y:%.*]], 7 +; CHECK-NEXT: [[SH1:%.*]] = xor i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[SH1]] ; %sh0 = shl i32 %x, 5 @@ -45,9 +45,9 @@ define i64 @lshr_and(i64 %x, i64 %py) { ; CHECK-LABEL: @lshr_and( ; CHECK-NEXT: [[Y:%.*]] = srem i64 [[PY:%.*]], 42 -; CHECK-NEXT: [[SH0:%.*]] = lshr i64 [[X:%.*]], 5 -; CHECK-NEXT: [[R:%.*]] = and i64 [[Y]], [[SH0]] -; CHECK-NEXT: [[SH1:%.*]] = lshr i64 [[R]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 12 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[Y]], 7 +; CHECK-NEXT: [[SH1:%.*]] = and i64 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i64 [[SH1]] ; %y = srem i64 %py, 42 ; thwart complexity-based canonicalization @@ -59,9 +59,9 @@ define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @lshr_or( -; CHECK-NEXT: [[SH0:%.*]] = lshr <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[SH0]], [[Y:%.*]] -; CHECK-NEXT: [[SH1:%.*]] = lshr <4 x i32> [[R]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[Y:%.*]], +; CHECK-NEXT: [[SH1:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[SH1]] ; %sh0 = lshr <4 x i32> %x, @@ -73,9 +73,9 @@ define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %py) { ; CHECK-LABEL: @lshr_xor( ; CHECK-NEXT: [[Y:%.*]] = srem <8 x i16> [[PY:%.*]], -; CHECK-NEXT: [[SH0:%.*]] = lshr <8 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = xor <8 x i16> [[Y]], [[SH0]] -; CHECK-NEXT: [[SH1:%.*]] = lshr <8 x i16> [[R]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[Y]], +; CHECK-NEXT: [[SH1:%.*]] = xor <8 x i16> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <8 x i16> [[SH1]] ; %y = srem <8 x i16> %py, ; thwart complexity-based canonicalization @@ -89,9 +89,9 @@ define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %py, <16 x i8> %pz) { ; CHECK-LABEL: @ashr_and( ; CHECK-NEXT: [[Y:%.*]] = srem <16 x i8> [[PY:%.*]], [[PZ:%.*]] -; CHECK-NEXT: [[SH0:%.*]] = ashr <16 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = and <16 x i8> [[Y]], [[SH0]] -; CHECK-NEXT: [[SH1:%.*]] = ashr <16 x i8> [[R]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i8> [[Y]], +; CHECK-NEXT: [[SH1:%.*]] = and <16 x i8> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <16 x i8> [[SH1]] ; %y = srem <16 x i8> %py, %pz ; thwart complexity-based canonicalization @@ -103,9 +103,9 @@ define <2 x i64> @ashr_or(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @ashr_or( -; CHECK-NEXT: [[SH0:%.*]] = ashr <2 x i64> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i64> [[SH0]], [[Y:%.*]] -; CHECK-NEXT: [[SH1:%.*]] = ashr <2 x i64> [[R]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = ashr <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[SH1:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i64> [[SH1]] ; %sh0 = ashr <2 x i64> %x, @@ -117,9 +117,9 @@ define i32 @ashr_xor(i32 %x, i32 %py) { ; CHECK-LABEL: @ashr_xor( ; CHECK-NEXT: [[Y:%.*]] = srem i32 [[PY:%.*]], 42 -; CHECK-NEXT: [[SH0:%.*]] = ashr i32 [[X:%.*]], 5 -; CHECK-NEXT: [[R:%.*]] = xor i32 [[Y]], [[SH0]] -; CHECK-NEXT: [[SH1:%.*]] = ashr i32 [[R]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 12 +; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[Y]], 7 +; CHECK-NEXT: [[SH1:%.*]] = xor i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[SH1]] ; %y = srem i32 %py, 42 ; thwart complexity-based canonicalization