diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1383,6 +1383,9 @@ if (Instruction *X = foldNoWrapAdd(I, Builder)) return X; + if (Instruction *R = foldBinOpShiftWithShift(I)) + return R; + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); Type *Ty = I.getType(); if (Ty->isIntOrIntVectorTy(1)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2155,6 +2155,9 @@ if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); + if (Instruction *R = foldBinOpShiftWithShift(I)) + return R; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Value *X, *Y; @@ -3202,6 +3205,9 @@ if (Instruction *Concat = matchOrConcat(I, Builder)) return replaceInstUsesWith(I, Concat); + if (Instruction *R = foldBinOpShiftWithShift(I)) + return R; + Value *X, *Y; const APInt *CV; if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) && @@ -4275,6 +4281,9 @@ if (Instruction *R = foldNot(I)) return R; + if (Instruction *R = foldBinOpShiftWithShift(I)) + return R; + // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M) // This it a special case in haveNoCommonBitsSet, but the computeKnownBits // calls in there are unnecessary as SimplifyDemandedInstructionBits should diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -450,6 +450,12 @@ Value *SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS); + // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C)) + // -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C) + // (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt)) + // -> (BinOp (logic_shift (BinOp X, Y)), Mask) + Instruction *foldBinOpShiftWithShift(BinaryOperator &I); + /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). Value *tryFactorizationFolds(BinaryOperator &I); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -730,6 +730,142 @@ return RetVal; } +// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C)) +// IFF +// 1) the logic_shifts match +// 2) either both binops are binops and one is `and` or +// BinOp1 is `and` +// (logic_shift (inv_logic_shift C1, C), C) == C1 or +// +// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C) +// +// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt)) +// IFF +// 1) the logic_shifts match +// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`). +// +// -> (BinOp (logic_shift (BinOp X, Y)), Mask) +Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) { + auto IsValidBinOpc = [](unsigned Opc) { + switch (Opc) { + default: + return false; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Add: + // Skip Sub as we only match constant masks which will canonicalize to use + // add. + return true; + } + }; + + // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra + // constraints. + auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2, + unsigned ShOpc) { + return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) || + ShOpc == Instruction::Shl; + }; + + auto GetInvShift = [](unsigned ShOpc) { + return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr; + }; + + auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2, + unsigned ShOpc, Constant *CMask, + Constant *CShift) { + // If the BinOp1 is `and` we don't need to check the mask. + if (BinOpc1 == Instruction::And) + return true; + + // For all other possible transfers we need complete distributable + // binop/shift (anything but `add` + `lshr`). + if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc)) + return false; + + // If BinOp2 is `and`, any mask works (this only really helps for non-splat + // vecs, otherwise the mask will be simplified and the following check will + // handle it). + if (BinOpc2 == Instruction::And) + return true; + + // Otherwise, need mask that meets the below requirement. + // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask + return ConstantExpr::get( + ShOpc, ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift), + CShift) == CMask; + }; + + auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * { + Constant *CMask, *CShift; + Value *X, *Y, *ShiftedX, *Mask, *Shift; + if (!match(I.getOperand(ShOpnum), + m_OneUse(m_LogicalShift(m_Value(Y), m_Value(Shift))))) + return nullptr; + if (!match(I.getOperand(1 - ShOpnum), + m_BinOp(m_Value(ShiftedX), m_Value(Mask)))) + return nullptr; + + if (!match(ShiftedX, + m_OneUse(m_LogicalShift(m_Value(X), m_Specific(Shift))))) + return nullptr; + + // Make sure we are matching instruction shifts and not ConstantExpr + auto *IY = dyn_cast(I.getOperand(ShOpnum)); + auto *IX = dyn_cast(ShiftedX); + if (!IY || !IX) + return nullptr; + + // LHS and RHS need same shift opcode + unsigned ShOpc = IY->getOpcode(); + if (ShOpc != IX->getOpcode()) + return nullptr; + + // Make sure binop is real instruction and not ConstantExpr + auto *BO2 = dyn_cast(I.getOperand(1 - ShOpnum)); + if (!BO2) + return nullptr; + + unsigned BinOpc = BO2->getOpcode(); + // Make sure we have valid binops. + if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc)) + return nullptr; + + // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just + // distribute to drop the shift irrelevant of constants. + if (BinOpc == I.getOpcode() && + IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) { + Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y); + Value *NewBinOp1 = Builder.CreateBinOp( + static_cast(ShOpc), NewBinOp2, Shift); + return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask); + } + + // Otherwise we can only distribute by constant shifting the mask, so + // ensure we have constants. + if (!match(Shift, m_ImmConstant(CShift))) + return nullptr; + if (!match(Mask, m_ImmConstant(CMask))) + return nullptr; + + // Check if we can distribute the binops. + if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift)) + return nullptr; + + Constant *NewCMask = ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift); + Value *NewBinOp2 = Builder.CreateBinOp( + static_cast(BinOpc), X, NewCMask); + Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2); + return BinaryOperator::Create(static_cast(ShOpc), + NewBinOp1, CShift); + }; + + if (Instruction *R = MatchBinOp(0)) + return R; + return MatchBinOp(1); +} + Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); BinaryOperator *Op0 = dyn_cast(LHS); diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll --- a/llvm/test/Transforms/InstCombine/and-xor-or.ll +++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll @@ -356,10 +356,9 @@ define i8 @and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; CHECK-LABEL: define {{[^@]+}}@and_shl ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { -; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X]], [[SHAMT]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[SY]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], [[SHAMT]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP2]], [[Z]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = shl i8 %x, %shamt @@ -372,10 +371,9 @@ define i8 @or_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; CHECK-LABEL: define {{[^@]+}}@or_shl ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { -; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X]], [[SHAMT]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[SY]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], [[SHAMT]] +; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP2]], [[Z]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = shl i8 %x, %shamt @@ -424,10 +422,9 @@ define i8 @or_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; CHECK-LABEL: define {{[^@]+}}@or_lshr ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = or i8 [[SY]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SHAMT]] +; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP2]], [[Z]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = lshr i8 %x, %shamt @@ -440,10 +437,9 @@ define i8 @xor_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; CHECK-LABEL: define {{[^@]+}}@xor_lshr ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SHAMT]] +; CHECK-NEXT: [[R:%.*]] = xor i8 [[TMP2]], [[Z]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = lshr i8 %x, %shamt @@ -565,9 +561,10 @@ ; CHECK-LABEL: define {{[^@]+}}@xor_lshr_multiuse ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { ; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] ; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SHAMT]] +; CHECK-NEXT: [[R:%.*]] = xor i8 [[TMP2]], [[Z]] ; CHECK-NEXT: [[R2:%.*]] = sdiv i8 [[A]], [[R]] ; CHECK-NEXT: ret i8 [[R2]] ; diff --git a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll --- a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll +++ b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll @@ -3,10 +3,9 @@ define i8 @shl_and_and(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_and_and( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl i8 [[X:%.*]], 4 -; CHECK-NEXT: [[SHIFT2:%.*]] = shl i8 [[Y:%.*]], 4 -; CHECK-NEXT: [[BW2:%.*]] = and i8 [[SHIFT2]], 80 -; CHECK-NEXT: [[BW1:%.*]] = and i8 [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], 4 +; CHECK-NEXT: [[BW1:%.*]] = and i8 [[TMP2]], 80 ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = shl i8 %x, 4 @@ -33,10 +32,9 @@ define i8 @shl_add_add(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_add_add( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl i8 [[X:%.*]], 2 -; CHECK-NEXT: [[SHIFT2:%.*]] = shl i8 [[Y:%.*]], 2 -; CHECK-NEXT: [[BW2:%.*]] = add i8 [[SHIFT2]], 48 -; CHECK-NEXT: [[BW1:%.*]] = add i8 [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], 2 +; CHECK-NEXT: [[BW1:%.*]] = add i8 [[TMP2]], 48 ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = shl i8 %x, 2 @@ -78,10 +76,9 @@ define <2 x i8> @lshr_and_or(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_or( -; CHECK-NEXT: [[SHIFT1:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SHIFT2:%.*]] = lshr <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[BW2:%.*]] = and <2 x i8> [[SHIFT1]], -; CHECK-NEXT: [[BW1:%.*]] = or <2 x i8> [[SHIFT2]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = lshr <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; %shift1 = lshr <2 x i8> %x, @@ -108,10 +105,9 @@ define i8 @shl_and_xor(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_and_xor( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl i8 [[X:%.*]], 1 -; CHECK-NEXT: [[SHIFT2:%.*]] = shl i8 [[Y:%.*]], 1 -; CHECK-NEXT: [[BW2:%.*]] = and i8 [[SHIFT1]], 20 -; CHECK-NEXT: [[BW1:%.*]] = xor i8 [[SHIFT2]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], 10 +; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = shl i8 [[TMP2]], 1 ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = shl i8 %x, 1 @@ -123,10 +119,9 @@ define i8 @shl_and_add(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_and_add( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl i8 [[X:%.*]], 1 -; CHECK-NEXT: [[SHIFT2:%.*]] = shl i8 [[Y:%.*]], 1 -; CHECK-NEXT: [[BW2:%.*]] = and i8 [[SHIFT2]], 118 -; CHECK-NEXT: [[BW1:%.*]] = add i8 [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 59 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = shl i8 [[TMP2]], 1 ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = shl i8 %x, 1 @@ -153,10 +148,9 @@ define i8 @lshr_or_and(i8 %x, i8 %y) { ; CHECK-LABEL: @lshr_or_and( -; CHECK-NEXT: [[SHIFT1:%.*]] = lshr i8 [[X:%.*]], 5 -; CHECK-NEXT: [[SHIFT2:%.*]] = lshr i8 [[Y:%.*]], 5 -; CHECK-NEXT: [[BW2:%.*]] = or i8 [[SHIFT1]], 6 -; CHECK-NEXT: [[BW1:%.*]] = and i8 [[BW2]], [[SHIFT2]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], -64 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = lshr i8 [[TMP2]], 5 ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = lshr i8 %x, 5 @@ -168,10 +162,9 @@ define i8 @lshr_or_or_fail(i8 %x, i8 %y) { ; CHECK-LABEL: @lshr_or_or_fail( -; CHECK-NEXT: [[SHIFT1:%.*]] = lshr i8 [[X:%.*]], 5 -; CHECK-NEXT: [[SHIFT2:%.*]] = lshr i8 [[Y:%.*]], 5 -; CHECK-NEXT: [[BW2:%.*]] = or i8 [[SHIFT2]], -58 -; CHECK-NEXT: [[BW1:%.*]] = or i8 [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], 5 +; CHECK-NEXT: [[BW1:%.*]] = or i8 [[TMP2]], -58 ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = lshr i8 %x, 5 @@ -183,10 +176,9 @@ define <2 x i8> @shl_xor_and(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_xor_and( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[BW2:%.*]] = xor <2 x i8> [[SHIFT2]], -; CHECK-NEXT: [[BW1:%.*]] = and <2 x i8> [[BW2]], [[SHIFT1]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = shl <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; %shift1 = shl <2 x i8> %x, @@ -213,10 +205,9 @@ define i8 @lshr_or_or_no_const(i8 %x, i8 %y, i8 %sh, i8 %mask) { ; CHECK-LABEL: @lshr_or_or_no_const( -; CHECK-NEXT: [[SHIFT1:%.*]] = lshr i8 [[X:%.*]], [[SH:%.*]] -; CHECK-NEXT: [[SHIFT2:%.*]] = lshr i8 [[Y:%.*]], [[SH]] -; CHECK-NEXT: [[BW2:%.*]] = or i8 [[SHIFT2]], [[MASK:%.*]] -; CHECK-NEXT: [[BW1:%.*]] = or i8 [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SH:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = or i8 [[TMP2]], [[MASK:%.*]] ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = lshr i8 %x, %sh @@ -243,10 +234,9 @@ define i8 @shl_xor_xor_no_const(i8 %x, i8 %y, i8 %sh, i8 %mask) { ; CHECK-LABEL: @shl_xor_xor_no_const( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl i8 [[X:%.*]], [[SH:%.*]] -; CHECK-NEXT: [[SHIFT2:%.*]] = shl i8 [[Y:%.*]], [[SH]] -; CHECK-NEXT: [[BW2:%.*]] = xor i8 [[SHIFT2]], [[MASK:%.*]] -; CHECK-NEXT: [[BW1:%.*]] = xor i8 [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], [[SH:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = xor i8 [[TMP2]], [[MASK:%.*]] ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = shl i8 %x, %sh @@ -273,10 +263,9 @@ define <2 x i8> @shl_and_and_no_const(<2 x i8> %x, <2 x i8> %y, <2 x i8> %sh, <2 x i8> %mask) { ; CHECK-LABEL: @shl_and_and_no_const( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], [[SH:%.*]] -; CHECK-NEXT: [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], [[SH]] -; CHECK-NEXT: [[BW2:%.*]] = and <2 x i8> [[SHIFT2]], [[MASK:%.*]] -; CHECK-NEXT: [[BW1:%.*]] = and <2 x i8> [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], [[SH:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = and <2 x i8> [[TMP2]], [[MASK:%.*]] ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; %shift1 = shl <2 x i8> %x, %sh @@ -288,10 +277,9 @@ define i8 @shl_add_add_no_const(i8 %x, i8 %y, i8 %sh, i8 %mask) { ; CHECK-LABEL: @shl_add_add_no_const( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl i8 [[X:%.*]], [[SH:%.*]] -; CHECK-NEXT: [[SHIFT2:%.*]] = shl i8 [[Y:%.*]], [[SH]] -; CHECK-NEXT: [[BW2:%.*]] = add i8 [[SHIFT2]], [[MASK:%.*]] -; CHECK-NEXT: [[BW1:%.*]] = add i8 [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], [[SH:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = add i8 [[TMP2]], [[MASK:%.*]] ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = shl i8 %x, %sh @@ -318,10 +306,9 @@ define <2 x i8> @lshr_add_and(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @lshr_add_and( -; CHECK-NEXT: [[SHIFT1:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SHIFT2:%.*]] = lshr <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[BW2:%.*]] = add <2 x i8> [[SHIFT2]], -; CHECK-NEXT: [[BW1:%.*]] = and <2 x i8> [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = lshr <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; %shift1 = lshr <2 x i8> %x, @@ -348,10 +335,9 @@ define <2 x i8> @shl_or_or_good_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_or_or_good_mask( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[BW2:%.*]] = or <2 x i8> [[SHIFT2]], -; CHECK-NEXT: [[BW1:%.*]] = or <2 x i8> [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], +; CHECK-NEXT: [[BW1:%.*]] = or <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; %shift1 = shl <2 x i8> %x, @@ -363,10 +349,9 @@ define <2 x i8> @shl_or_or_fail_bad_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_or_or_fail_bad_mask( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[BW2:%.*]] = or <2 x i8> [[SHIFT2]], -; CHECK-NEXT: [[BW1:%.*]] = or <2 x i8> [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], +; CHECK-NEXT: [[BW1:%.*]] = or <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; %shift1 = shl <2 x i8> %x, @@ -378,10 +363,9 @@ define i8 @lshr_xor_or_good_mask(i8 %x, i8 %y) { ; CHECK-LABEL: @lshr_xor_or_good_mask( -; CHECK-NEXT: [[SHIFT1:%.*]] = lshr i8 [[X:%.*]], 4 -; CHECK-NEXT: [[SHIFT2:%.*]] = lshr i8 [[Y:%.*]], 4 -; CHECK-NEXT: [[BW21:%.*]] = or i8 [[SHIFT2]], 48 -; CHECK-NEXT: [[BW1:%.*]] = or i8 [[SHIFT1]], [[BW21]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], 4 +; CHECK-NEXT: [[BW1:%.*]] = or i8 [[TMP2]], 48 ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = lshr i8 %x, 4 @@ -408,10 +392,9 @@ define <2 x i8> @lshr_or_xor_good_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @lshr_or_xor_good_mask( -; CHECK-NEXT: [[SHIFT1:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SHIFT2:%.*]] = lshr <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[BW2:%.*]] = or <2 x i8> [[SHIFT2]], -; CHECK-NEXT: [[BW1:%.*]] = xor <2 x i8> [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i8> [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = lshr <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; %shift1 = lshr <2 x i8> %x, @@ -438,9 +421,9 @@ define i8 @shl_xor_xor_good_mask(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_xor_xor_good_mask( -; CHECK-NEXT: [[SHIFT21:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[SHIFT21]], 1 -; CHECK-NEXT: [[BW1:%.*]] = xor i8 [[TMP1]], 88 +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], 1 +; CHECK-NEXT: [[BW1:%.*]] = xor i8 [[TMP2]], 88 ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = shl i8 %x, 1 @@ -452,9 +435,9 @@ define i8 @shl_xor_xor_bad_mask_distribute(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_xor_xor_bad_mask_distribute( -; CHECK-NEXT: [[SHIFT21:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[SHIFT21]], 1 -; CHECK-NEXT: [[BW1:%.*]] = xor i8 [[TMP1]], -68 +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], 1 +; CHECK-NEXT: [[BW1:%.*]] = xor i8 [[TMP2]], -68 ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = shl i8 %x, 1 @@ -466,10 +449,9 @@ define i8 @shl_add_and(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_add_and( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl i8 [[X:%.*]], 1 -; CHECK-NEXT: [[SHIFT2:%.*]] = shl i8 [[Y:%.*]], 1 -; CHECK-NEXT: [[BW2:%.*]] = add i8 [[SHIFT2]], 123 -; CHECK-NEXT: [[BW1:%.*]] = and i8 [[SHIFT1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Y:%.*]], 61 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = shl i8 [[TMP2]], 1 ; CHECK-NEXT: ret i8 [[BW1]] ; %shift1 = shl i8 %x, 1 @@ -526,10 +508,9 @@ define <2 x i8> @lshr_and_add(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_add( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[BW2:%.*]] = and <2 x i8> [[SHIFT1]], -; CHECK-NEXT: [[BW1:%.*]] = add <2 x i8> [[SHIFT2]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = shl <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; %shift1 = shl <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/or-shifted-masks.ll b/llvm/test/Transforms/InstCombine/or-shifted-masks.ll --- a/llvm/test/Transforms/InstCombine/or-shifted-masks.ll +++ b/llvm/test/Transforms/InstCombine/or-shifted-masks.ll @@ -75,16 +75,10 @@ define i32 @multiuse2(i32 %x) { ; CHECK-LABEL: @multiuse2( -; CHECK-NEXT: [[I:%.*]] = shl i32 [[X:%.*]], 1 -; CHECK-NEXT: [[I2:%.*]] = and i32 [[I]], 12 -; CHECK-NEXT: [[I6:%.*]] = shl i32 [[X]], 8 -; CHECK-NEXT: [[I7:%.*]] = and i32 [[I6]], 24576 -; CHECK-NEXT: [[I14:%.*]] = shl i32 [[X]], 8 -; CHECK-NEXT: [[I9:%.*]] = and i32 [[I14]], 7680 -; CHECK-NEXT: [[I10:%.*]] = or i32 [[I7]], [[I9]] -; CHECK-NEXT: [[I85:%.*]] = shl i32 [[X]], 1 -; CHECK-NEXT: [[I11:%.*]] = and i32 [[I85]], 240 -; CHECK-NEXT: [[I12:%.*]] = or i32 [[I2]], [[I11]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 8 +; CHECK-NEXT: [[I10:%.*]] = and i32 [[TMP1]], 32256 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[X]], 1 +; CHECK-NEXT: [[I12:%.*]] = and i32 [[TMP2]], 252 ; CHECK-NEXT: [[I13:%.*]] = or i32 [[I10]], [[I12]] ; CHECK-NEXT: ret i32 [[I13]] ; @@ -107,15 +101,10 @@ define i32 @multiuse3(i32 %x) { ; CHECK-LABEL: @multiuse3( -; CHECK-NEXT: [[I:%.*]] = and i32 [[X:%.*]], 96 -; CHECK-NEXT: [[I1:%.*]] = shl nuw nsw i32 [[I]], 6 -; CHECK-NEXT: [[I2:%.*]] = lshr exact i32 [[I]], 1 -; CHECK-NEXT: [[I3:%.*]] = shl i32 [[X]], 6 -; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 1920 -; CHECK-NEXT: [[I5:%.*]] = or i32 [[I1]], [[I4]] -; CHECK-NEXT: [[I6:%.*]] = lshr i32 [[X]], 1 -; CHECK-NEXT: [[I7:%.*]] = and i32 [[I6]], 15 -; CHECK-NEXT: [[I8:%.*]] = or i32 [[I2]], [[I7]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 6 +; CHECK-NEXT: [[I5:%.*]] = and i32 [[TMP1]], 8064 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[X]], 1 +; CHECK-NEXT: [[I8:%.*]] = and i32 [[TMP2]], 63 ; CHECK-NEXT: [[I9:%.*]] = or i32 [[I8]], [[I5]] ; CHECK-NEXT: ret i32 [[I9]] ; @@ -134,20 +123,18 @@ define i32 @multiuse4(i32 %x) local_unnamed_addr { ; CHECK-LABEL: @multiuse4( -; CHECK-NEXT: [[I:%.*]] = and i32 [[X:%.*]], 100663296 -; CHECK-NEXT: [[I1:%.*]] = icmp sgt i32 [[X]], -1 +; CHECK-NEXT: [[I1:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: br i1 [[I1]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: -; CHECK-NEXT: [[I2:%.*]] = lshr exact i32 [[I]], 22 +; CHECK-NEXT: [[I:%.*]] = lshr i32 [[X]], 22 +; CHECK-NEXT: [[I2:%.*]] = and i32 [[I]], 24 ; CHECK-NEXT: [[I3:%.*]] = lshr i32 [[X]], 22 ; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 480 ; CHECK-NEXT: [[I5:%.*]] = or i32 [[I4]], [[I2]] ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: -; CHECK-NEXT: [[I6:%.*]] = lshr exact i32 [[I]], 17 -; CHECK-NEXT: [[I7:%.*]] = lshr i32 [[X]], 17 -; CHECK-NEXT: [[I8:%.*]] = and i32 [[I7]], 15360 -; CHECK-NEXT: [[I9:%.*]] = or i32 [[I8]], [[I6]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 17 +; CHECK-NEXT: [[I9:%.*]] = and i32 [[TMP1]], 16128 ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[I10:%.*]] = phi i32 [ [[I5]], [[IF]] ], [ [[I9]], [[ELSE]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll --- a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll +++ b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll @@ -60,43 +60,36 @@ ; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i64 0, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1]] ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ADD_PTR_1]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1:%.*]] = shl i32 [[TMP8]], 1 -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_1]], [[SUM_11_1]] ; CHECK-NEXT: [[IDX_NEG_1_1:%.*]] = xor i64 [[INDVARS_IV_1]], -1 ; CHECK-NEXT: [[ADD_PTR_1_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_1]] ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_1:%.*]] = shl i32 [[TMP9]], 1 -; CHECK-NEXT: [[ADD_1_1:%.*]] = add i32 [[MUL_1_1]], [[ADD_1]] +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[IDX_NEG_1_2:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_2]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_2:%.*]] = shl i32 [[TMP10]], 1 -; CHECK-NEXT: [[ADD_1_2:%.*]] = add i32 [[MUL_1_2]], [[ADD_1_1]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[IDX_NEG_1_3:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_3]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_3:%.*]] = shl i32 [[TMP11]], 1 -; CHECK-NEXT: [[ADD_1_3:%.*]] = add i32 [[MUL_1_3]], [[ADD_1_2]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[IDX_NEG_1_4:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_4]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_4:%.*]] = shl i32 [[TMP12]], 1 -; CHECK-NEXT: [[ADD_1_4:%.*]] = add i32 [[MUL_1_4]], [[ADD_1_3]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP14]], [[TMP15]] ; CHECK-NEXT: [[IDX_NEG_1_5:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_5]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_5:%.*]] = shl i32 [[TMP13]], 1 -; CHECK-NEXT: [[ADD_1_5:%.*]] = add i32 [[MUL_1_5]], [[ADD_1_4]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]] ; CHECK-NEXT: [[IDX_NEG_1_6:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_6]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_6:%.*]] = shl i32 [[TMP14]], 1 -; CHECK-NEXT: [[ADD_1_6:%.*]] = add i32 [[MUL_1_6]], [[ADD_1_5]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP18]], [[TMP19]] ; CHECK-NEXT: [[IDX_NEG_1_7:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_7]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_7:%.*]] = shl i32 [[TMP15]], 1 -; CHECK-NEXT: [[ADD_1_7]] = add i32 [[MUL_1_7]], [[ADD_1_6]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 1 +; CHECK-NEXT: [[ADD_1_7]] = add i32 [[TMP23]], [[SUM_11_1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1_7]] = add nuw nsw i64 [[INDVARS_IV_1]], 8 ; CHECK-NEXT: [[EXITCOND_1_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1_7]], 32 ; CHECK-NEXT: br i1 [[EXITCOND_1_NOT_7]], label [[FOR_BODY4_2:%.*]], label [[FOR_BODY4_1]], !llvm.loop [[LOOP7]] @@ -105,43 +98,43 @@ ; CHECK-NEXT: [[SUM_11_2:%.*]] = phi i32 [ [[ADD_2_7:%.*]], [[FOR_BODY4_2]] ], [ [[ADD_1_7]], [[FOR_BODY4_1]] ] ; CHECK-NEXT: [[IDX_NEG_2:%.*]] = sub nsw i64 0, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP16]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP24]], 3 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], [[SUM_11_2]] ; CHECK-NEXT: [[IDX_NEG_2_1:%.*]] = xor i64 [[INDVARS_IV_2]], -1 ; CHECK-NEXT: [[ADD_PTR_2_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_1]] -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_1:%.*]] = mul i32 [[TMP17]], 3 +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_1:%.*]] = mul i32 [[TMP25]], 3 ; CHECK-NEXT: [[ADD_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD_2]] ; CHECK-NEXT: [[IDX_NEG_2_2:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_2]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_2:%.*]] = mul i32 [[TMP18]], 3 +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_2:%.*]] = mul i32 [[TMP26]], 3 ; CHECK-NEXT: [[ADD_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD_2_1]] ; CHECK-NEXT: [[IDX_NEG_2_3:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_3]] -; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_3:%.*]] = mul i32 [[TMP19]], 3 +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_3:%.*]] = mul i32 [[TMP27]], 3 ; CHECK-NEXT: [[ADD_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD_2_2]] ; CHECK-NEXT: [[IDX_NEG_2_4:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_4]] -; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_4:%.*]] = mul i32 [[TMP20]], 3 +; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_4:%.*]] = mul i32 [[TMP28]], 3 ; CHECK-NEXT: [[ADD_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD_2_3]] ; CHECK-NEXT: [[IDX_NEG_2_5:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_5]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_5:%.*]] = mul i32 [[TMP21]], 3 +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_5:%.*]] = mul i32 [[TMP29]], 3 ; CHECK-NEXT: [[ADD_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD_2_4]] ; CHECK-NEXT: [[IDX_NEG_2_6:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_6]] -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_6:%.*]] = mul i32 [[TMP22]], 3 +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_6:%.*]] = mul i32 [[TMP30]], 3 ; CHECK-NEXT: [[ADD_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD_2_5]] ; CHECK-NEXT: [[IDX_NEG_2_7:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_7]] -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_7:%.*]] = mul i32 [[TMP23]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_7:%.*]] = mul i32 [[TMP31]], 3 ; CHECK-NEXT: [[ADD_2_7]] = add i32 [[MUL_2_7]], [[ADD_2_6]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2_7]] = add nuw nsw i64 [[INDVARS_IV_2]], 8 ; CHECK-NEXT: [[EXITCOND_2_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2_7]], 32