diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -450,10 +450,11 @@ Value *SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS); - // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C)) - // -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C) - // (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt)) - // -> (BinOp (logic_shift (BinOp X, Y)), Mask) + // Fold consecutive sequences of binops that have a base of a common logic + // shift. If all the binops are the same, this works with non-constant + // operands. Otherwise it only applies for if the shift amount and all binop + // operands are constant. + // Applicable binops are: 'and', 'xor', 'or', and 'add'. Instruction *foldBinOpShiftWithShift(BinaryOperator &I); /// This tries to simplify binary operations by factorizing out common terms diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -730,21 +730,24 @@ return RetVal; } -// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C)) +// (OuterBinop (BOA_N...(logic_shift X,C),C1)),(BOB_N...(logic_shift Y,C))) // IFF // 1) the logic_shifts match -// 2) either both binops are binops and one is `and` or -// BinOp1 is `and` +// 2) There can be zero binops between OuterBinOp and one of the shifts. +// IFF two binops total (including OuterBinOp) +// 3) either both binops are bitwise (or `add` + `shl`) and one is `and` or +// OuterBinOp is `and` // (logic_shift (inv_logic_shift C1, C), C) == C1 or // -// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C) +// -> (logic_shift (OuterBinOp (BOA_N... X, inv_logic_shift(C1, C)), Y), C) // -// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt)) +// (OuterBinOp (BO_N... (logic_shift X, Amt), Mask), (logic_shift Y, Amt)) // IFF // 1) the logic_shifts match -// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`). +// 2) OuterBinOp == BinOp_N (all of them). If BinOp == `add`, then also +// requires `shl`. // -// -> (BinOp (logic_shift (BinOp X, Y)), Mask) +// -> (BinOp_N... (logic_shift (BinOp X, Y)), Mask) Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) { auto IsValidBinOpc = [](unsigned Opc) { switch (Opc) { @@ -760,110 +763,195 @@ } }; + auto IsValidOperand = [IsValidBinOpc](const Value *V) { + if (const Instruction *Ins = dyn_cast(V)) + return IsValidBinOpc(Ins->getOpcode()); + return false; + }; + // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra // constraints. - auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2, - unsigned ShOpc) { - return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) || - ShOpc == Instruction::Shl; + auto IsCompletelyDistributable = [](bool HasAdd, unsigned ShOpc) { + return !HasAdd || ShOpc == Instruction::Shl; }; auto GetInvShift = [](unsigned ShOpc) { return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr; }; - auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2, - unsigned ShOpc, Constant *CMask, - Constant *CShift) { - // If the BinOp1 is `and` we don't need to check the mask. - if (BinOpc1 == Instruction::And) - return true; - - // For all other possible transfers we need complete distributable - // binop/shift (anything but `add` + `lshr`). - if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc)) - return false; + auto CanDistributeBinops = + [&](unsigned OuterBinOpc, bool HasAdd, unsigned ShOpc, Constant *CShAmt, + const ArrayRef> BinOps) { + size_t NumInnerOps = BinOps[0].size() + BinOps[1].size(); + // If the OuterBinOp is `and` and two total, we don't need to check the + // mask. TODO: This condition is overly conservative when there are + // multiple inner binops. + if (OuterBinOpc == Instruction::And && NumInnerOps == 1) + return true; + + // For all other possible transfers we need complete distributable + // binop/shift (anything but `add` + `lshr`). + if (!IsCompletelyDistributable(HasAdd, ShOpc)) + return false; - // If BinOp2 is `and`, any mask works (this only really helps for non-splat - // vecs, otherwise the mask will be simplified and the following check will - // handle it). - if (BinOpc2 == Instruction::And) - return true; + for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { + bool HasAnd = false; + for (auto *Ins : BinOps[OpIdx]) { + // Application if we have 1 InnerBinOp: If we have an inner `and`, + // any mask works (this only really helps for non-splat vecs, + // otherwise the mask will be simplified and the following check + // will handle it). + HasAnd |= Ins->getOpcode() == Instruction::And; + Constant *C; + // Transform only makes sense if we can constant evaluate shifted + // mask. + if (!match(Ins->getOperand(1), m_ImmConstant(C))) + return false; - // Otherwise, need mask that meets the below requirement. - // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask - return ConstantExpr::get( - ShOpc, ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift), - CShift) == CMask; - }; + if (!HasAnd || NumInnerOps != 1) + // Otherwise, need mask that meets the below requirement. + // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask + if (ConstantExpr::get( + ShOpc, ConstantExpr::get(GetInvShift(ShOpc), C, CShAmt), + CShAmt) != C) + return false; + } + } + return true; + }; + + Constant *CShAmt; + SmallVector BinOps[2]; + Instruction *Shifts[2] = {nullptr, nullptr}; + unsigned ShOpc, OuterBinOpc = I.getOpcode(); + Value *ShAmt; + bool HasAdd = OuterBinOpc == Instruction::Add; + bool AllSame = true; + for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { + Instruction *Last = nullptr; + Instruction *Cur = dyn_cast(I.getOperand(OpIdx)); + + while (true) { + // If cur is not an instruction we can never transform (if its a + // ConstantExpr, we will retry later after its been evaluated). + if (!Cur) + return nullptr; - auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * { - Constant *CMask, *CShift; - Value *X, *Y, *ShiftedX, *Mask, *Shift; - if (!match(I.getOperand(ShOpnum), - m_OneUse(m_LogicalShift(m_Value(Y), m_Value(Shift))))) - return nullptr; - if (!match(I.getOperand(1 - ShOpnum), - m_BinOp(m_Value(ShiftedX), m_Value(Mask)))) - return nullptr; + // If cur has multiple uses this transform will *probably* increase + // instruction count. + if (!Cur->hasOneUse()) + return nullptr; - if (!match(ShiftedX, - m_OneUse(m_LogicalShift(m_Value(X), m_Specific(Shift))))) - return nullptr; + // Ends when we find a value that isn't 'and', 'xor', or', or 'add'. + if (!IsValidOperand(Cur)) + break; - // Make sure we are matching instruction shifts and not ConstantExpr - auto *IY = dyn_cast(I.getOperand(ShOpnum)); - auto *IX = dyn_cast(ShiftedX); - if (!IY || !IX) - return nullptr; + BinOps[OpIdx].push_back(Cur); + // Track if we find an 'add'. 'add' has additional constraints below. + HasAdd |= Cur->getOpcode() == Instruction::Add; + AllSame &= Cur->getOpcode() == OuterBinOpc; + Last = Cur; + // TODO: We could properly gather all non-constant operands for check for + // a shift. This is probably overkill, however, and would require extra + // logic to prevent exponential explosion. This transform is primarily + // targetted at constants hence we only meaningfully follow op1. + Cur = dyn_cast(Cur->getOperand(0)); + } + // See if we have a valid shift at the end. + for (unsigned LastOpIdx = 0, E = (Last ? 2 : 1); LastOpIdx < E; + ++LastOpIdx) { + Value *A, *B; + Cur = Last ? dyn_cast(Last->getOperand(LastOpIdx)) : Cur; + if (!Cur || !match(Cur, m_LogicalShift(m_Value(A), m_Value(B)))) + continue; - // LHS and RHS need same shift opcode - unsigned ShOpc = IY->getOpcode(); - if (ShOpc != IX->getOpcode()) + if (OpIdx == 0) { + ShOpc = cast(Cur)->getOpcode(); + ShAmt = B; + } else { + // If shift opcodes don't match we can't transform. + if (ShOpc != cast(Cur)->getOpcode()) + continue; + // If the shift amounts don't match we can't transform. + if (B != ShAmt) + continue; + } + Shifts[OpIdx] = Cur; + break; + } + // We didn't find a shift. + if (Shifts[OpIdx] == nullptr) return nullptr; + } - // Make sure binop is real instruction and not ConstantExpr - auto *BO2 = dyn_cast(I.getOperand(1 - ShOpnum)); - if (!BO2) - return nullptr; + // If all binops the same and it's bitwise or shl with add, then just + // distribute to drop the shift irrelevant of constants. + if (AllSame && IsCompletelyDistributable(HasAdd, ShOpc)) { + Instruction::BinaryOps BOpc = + static_cast(OuterBinOpc); + Value *BaseBinOp = Builder.CreateBinOp(BOpc, Shifts[0]->getOperand(0), + Shifts[1]->getOperand(0)); + Value *AggregatedBinOp = nullptr; + auto NextBinOp = [&](Value *Prev, Value *V) { + if (Prev == nullptr) + return V; + return Builder.CreateBinOp(BOpc, Prev, V); + }; + for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { + if (!BinOps[OpIdx].empty()) { + // We could combine all the constants here and reschedule the binop for + // better ILP, but that really belongs in another fold/pass. + for (unsigned BinOpIdx = 0; BinOpIdx + 1 < BinOps[OpIdx].size(); + ++BinOpIdx) + AggregatedBinOp = NextBinOp(AggregatedBinOp, + BinOps[OpIdx][BinOpIdx]->getOperand(1)); + Value *Last = BinOps[OpIdx].back()->getOperand(0); + if (Last == Shifts[OpIdx]) + Last = BinOps[OpIdx].back()->getOperand(1); + AggregatedBinOp = NextBinOp(AggregatedBinOp, Last); + } + } - unsigned BinOpc = BO2->getOpcode(); - // Make sure we have valid binops. - if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc)) - return nullptr; + Value *FinalLhs = BaseBinOp, *FinalRhs = ShAmt; + Instruction::BinaryOps FinalBOpc = + static_cast(ShOpc); - // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just - // distribute to drop the shift irrelevant of constants. - if (BinOpc == I.getOpcode() && - IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) { - Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y); - Value *NewBinOp1 = Builder.CreateBinOp( - static_cast(ShOpc), NewBinOp2, Shift); - return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask); + if (AggregatedBinOp) { + FinalLhs = Builder.CreateBinOp(FinalBOpc, FinalLhs, FinalRhs); + FinalRhs = AggregatedBinOp; + FinalBOpc = BOpc; } - // Otherwise we can only distribute by constant shifting the mask, so - // ensure we have constants. - if (!match(Shift, m_ImmConstant(CShift))) - return nullptr; - if (!match(Mask, m_ImmConstant(CMask))) - return nullptr; + return BinaryOperator::Create(FinalBOpc, FinalLhs, FinalRhs); + } - // Check if we can distribute the binops. - if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift)) - return nullptr; + // Otherwise we can only distribute by constant shifting the mask, so + // ensure we have constants. + if (!match(ShAmt, m_ImmConstant(CShAmt))) + return nullptr; + // Check if we can distribute the binops. + if (!CanDistributeBinops(OuterBinOpc, HasAdd, ShOpc, CShAmt, BinOps)) + return nullptr; - Constant *NewCMask = ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift); - Value *NewBinOp2 = Builder.CreateBinOp( - static_cast(BinOpc), X, NewCMask); - Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2); - return BinaryOperator::Create(static_cast(ShOpc), - NewBinOp1, CShift); - }; + // Create new binop chain. + Value *NewBinOps[2]; + for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { + NewBinOps[OpIdx] = Shifts[OpIdx]->getOperand(0); + // Reverse order, we can't swap up order across distinct opcodes. + for (auto *Ins : reverse(BinOps[OpIdx])) { + Constant *NewCMask = ConstantExpr::get( + GetInvShift(ShOpc), cast(Ins->getOperand(1)), CShAmt); + NewBinOps[OpIdx] = Builder.CreateBinOp( + static_cast(Ins->getOpcode()), + NewBinOps[OpIdx], NewCMask); + } + } - if (Instruction *R = MatchBinOp(0)) - return R; - return MatchBinOp(1); + Value *NewOuterBinOp = + Builder.CreateBinOp(static_cast(OuterBinOpc), + NewBinOps[0], NewBinOps[1]); + return BinaryOperator::Create(static_cast(ShOpc), + NewOuterBinOp, CShAmt); } Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) { diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll --- a/llvm/test/Transforms/InstCombine/and-xor-or.ll +++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll @@ -356,7 +356,7 @@ define i8 @and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; CHECK-LABEL: define {{[^@]+}}@and_shl ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y]], [[X]] ; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], [[SHAMT]] ; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP2]], [[Z]] ; CHECK-NEXT: ret i8 [[R]] @@ -387,10 +387,9 @@ ; CHECK-LABEL: define {{[^@]+}}@xor_shl ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[ZARG:%.*]], i8 [[SHAMT:%.*]]) { ; CHECK-NEXT: [[Z:%.*]] = sdiv i8 42, [[ZARG]] -; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X]], [[SHAMT]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = xor i8 [[Z]], [[SX]] -; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], [[SHAMT]] +; CHECK-NEXT: [[R:%.*]] = xor i8 [[TMP2]], [[Z]] ; CHECK-NEXT: ret i8 [[R]] ; %z = sdiv i8 42, %zarg ; thwart complexity-based canonicalization @@ -405,10 +404,9 @@ ; CHECK-LABEL: define {{[^@]+}}@and_lshr ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[ZARG:%.*]], i8 [[SHAMT:%.*]]) { ; CHECK-NEXT: [[Z:%.*]] = sdiv i8 42, [[ZARG]] -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = and i8 [[Z]], [[SX]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[SY]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SHAMT]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP2]], [[Z]] ; CHECK-NEXT: ret i8 [[R]] ; %z = sdiv i8 42, %zarg ; thwart complexity-based canonicalization @@ -422,7 +420,7 @@ define i8 @or_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; CHECK-LABEL: define {{[^@]+}}@or_lshr ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Y]], [[X]] ; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SHAMT]] ; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP2]], [[Z]] ; CHECK-NEXT: ret i8 [[R]] @@ -561,10 +559,9 @@ ; CHECK-LABEL: define {{[^@]+}}@xor_lshr_multiuse ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { ; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] ; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SHAMT]] -; CHECK-NEXT: [[R:%.*]] = xor i8 [[TMP2]], [[Z]] +; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]] ; CHECK-NEXT: [[R2:%.*]] = sdiv i8 [[A]], [[R]] ; CHECK-NEXT: ret i8 [[R2]] ; diff --git a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll --- a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll +++ b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll @@ -3,7 +3,7 @@ define i8 @shl_and_and(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_and_and( -; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], 4 ; CHECK-NEXT: [[BW1:%.*]] = and i8 [[TMP2]], 80 ; CHECK-NEXT: ret i8 [[BW1]] @@ -32,7 +32,7 @@ define i8 @shl_add_add(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_add_add( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], 2 ; CHECK-NEXT: [[BW1:%.*]] = add i8 [[TMP2]], 48 ; CHECK-NEXT: ret i8 [[BW1]] @@ -162,7 +162,7 @@ define i8 @lshr_or_or_fail(i8 %x, i8 %y) { ; CHECK-LABEL: @lshr_or_or_fail( -; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], 5 ; CHECK-NEXT: [[BW1:%.*]] = or i8 [[TMP2]], -58 ; CHECK-NEXT: ret i8 [[BW1]] @@ -205,7 +205,7 @@ define i8 @lshr_or_or_no_const(i8 %x, i8 %y, i8 %sh, i8 %mask) { ; CHECK-LABEL: @lshr_or_or_no_const( -; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SH:%.*]] ; CHECK-NEXT: [[BW1:%.*]] = or i8 [[TMP2]], [[MASK:%.*]] ; CHECK-NEXT: ret i8 [[BW1]] @@ -234,7 +234,7 @@ define i8 @shl_xor_xor_no_const(i8 %x, i8 %y, i8 %sh, i8 %mask) { ; CHECK-LABEL: @shl_xor_xor_no_const( -; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], [[SH:%.*]] ; CHECK-NEXT: [[BW1:%.*]] = xor i8 [[TMP2]], [[MASK:%.*]] ; CHECK-NEXT: ret i8 [[BW1]] @@ -263,7 +263,7 @@ define <2 x i8> @shl_and_and_no_const(<2 x i8> %x, <2 x i8> %y, <2 x i8> %sh, <2 x i8> %mask) { ; CHECK-LABEL: @shl_and_and_no_const( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], [[SH:%.*]] ; CHECK-NEXT: [[BW1:%.*]] = and <2 x i8> [[TMP2]], [[MASK:%.*]] ; CHECK-NEXT: ret <2 x i8> [[BW1]] @@ -277,7 +277,7 @@ define i8 @shl_add_add_no_const(i8 %x, i8 %y, i8 %sh, i8 %mask) { ; CHECK-LABEL: @shl_add_add_no_const( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], [[SH:%.*]] ; CHECK-NEXT: [[BW1:%.*]] = add i8 [[TMP2]], [[MASK:%.*]] ; CHECK-NEXT: ret i8 [[BW1]] @@ -335,7 +335,7 @@ define <2 x i8> @shl_or_or_good_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_or_or_good_mask( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], ; CHECK-NEXT: [[BW1:%.*]] = or <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] @@ -349,7 +349,7 @@ define <2 x i8> @shl_or_or_fail_bad_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_or_or_fail_bad_mask( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], ; CHECK-NEXT: [[BW1:%.*]] = or <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] @@ -363,7 +363,7 @@ define i8 @lshr_xor_or_good_mask(i8 %x, i8 %y) { ; CHECK-LABEL: @lshr_xor_or_good_mask( -; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], 4 ; CHECK-NEXT: [[BW1:%.*]] = or i8 [[TMP2]], 48 ; CHECK-NEXT: ret i8 [[BW1]] @@ -421,7 +421,7 @@ define i8 @shl_xor_xor_good_mask(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_xor_xor_good_mask( -; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], 1 ; CHECK-NEXT: [[BW1:%.*]] = xor i8 [[TMP2]], 88 ; CHECK-NEXT: ret i8 [[BW1]] @@ -435,7 +435,7 @@ define i8 @shl_xor_xor_bad_mask_distribute(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_xor_xor_bad_mask_distribute( -; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], 1 ; CHECK-NEXT: [[BW1:%.*]] = xor i8 [[TMP2]], -68 ; CHECK-NEXT: ret i8 [[BW1]] @@ -553,19 +553,18 @@ define i8 @lshr_ors_x9_noconsts(i8 %amt, i8 %x, i8 %y, i8 %z0, i8 %z1, i8 %z2, i8 %z3, i8 %z4, i8 %z5, i8 %z6, i8 %z7, i8 %z8, i8 %z9) { ; CHECK-LABEL: @lshr_ors_x9_noconsts( -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[AMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[AMT]] -; CHECK-NEXT: [[X0:%.*]] = or i8 [[SX]], [[Z0:%.*]] -; CHECK-NEXT: [[X1:%.*]] = or i8 [[X0]], [[Z1:%.*]] -; CHECK-NEXT: [[X2:%.*]] = or i8 [[X1]], [[Z2:%.*]] -; CHECK-NEXT: [[X3:%.*]] = or i8 [[X2]], [[Z3:%.*]] -; CHECK-NEXT: [[X4:%.*]] = or i8 [[X3]], [[Z4:%.*]] -; CHECK-NEXT: [[X5:%.*]] = or i8 [[X4]], [[Z5:%.*]] -; CHECK-NEXT: [[X6:%.*]] = or i8 [[X5]], [[Z6:%.*]] -; CHECK-NEXT: [[X7:%.*]] = or i8 [[X6]], [[Z7:%.*]] -; CHECK-NEXT: [[X8:%.*]] = or i8 [[X7]], [[Z8:%.*]] -; CHECK-NEXT: [[X9:%.*]] = or i8 [[X8]], [[Z9:%.*]] -; CHECK-NEXT: [[R:%.*]] = or i8 [[SY]], [[X9]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[Z9:%.*]], [[Z8:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], [[Z7:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or i8 [[TMP3]], [[Z6:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i8 [[TMP4]], [[Z5:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = or i8 [[TMP5]], [[Z4:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i8 [[TMP6]], [[Z3:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = or i8 [[TMP7]], [[Z2:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or i8 [[TMP8]], [[Z1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or i8 [[TMP9]], [[Z0:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = lshr i8 [[TMP1]], [[AMT:%.*]] +; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP11]], [[TMP10]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = lshr i8 %x, %amt @@ -586,19 +585,18 @@ define i8 @shl_xors_x8_noconsts(i8 %amt, i8 %x, i8 %y, i8 %z0, i8 %z1, i8 %z2, i8 %z3, i8 %z4, i8 %z5, i8 %z6, i8 %z7, i8 %z8, i8 %z9) { ; CHECK-LABEL: @shl_xors_x8_noconsts( -; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[AMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[AMT]] -; CHECK-NEXT: [[X0:%.*]] = xor i8 [[SX]], [[Z0:%.*]] -; CHECK-NEXT: [[X1:%.*]] = xor i8 [[X0]], [[Z1:%.*]] -; CHECK-NEXT: [[X2:%.*]] = xor i8 [[X1]], [[Z2:%.*]] -; CHECK-NEXT: [[X3:%.*]] = xor i8 [[X2]], [[Z3:%.*]] -; CHECK-NEXT: [[X4:%.*]] = xor i8 [[X3]], [[Z4:%.*]] -; CHECK-NEXT: [[X5:%.*]] = xor i8 [[X4]], [[Z5:%.*]] -; CHECK-NEXT: [[X6:%.*]] = xor i8 [[X5]], [[Z6:%.*]] -; CHECK-NEXT: [[X7:%.*]] = xor i8 [[X6]], [[Z7:%.*]] -; CHECK-NEXT: [[X8:%.*]] = xor i8 [[X7]], [[Z8:%.*]] -; CHECK-NEXT: [[Y9:%.*]] = xor i8 [[SY]], [[Z9:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor i8 [[Y9]], [[X8]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[Z9:%.*]], [[Z8:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[TMP2]], [[Z7:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], [[Z6:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = xor i8 [[TMP4]], [[Z5:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = xor i8 [[TMP5]], [[Z4:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = xor i8 [[TMP6]], [[Z3:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = xor i8 [[TMP7]], [[Z2:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = xor i8 [[TMP8]], [[Z1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = xor i8 [[TMP9]], [[Z0:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = shl i8 [[TMP1]], [[AMT:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i8 [[TMP11]], [[TMP10]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = shl i8 %x, %amt @@ -619,19 +617,18 @@ define i8 @lshr_ands_x7_noconsts(i8 %amt, i8 %x, i8 %y, i8 %z0, i8 %z1, i8 %z2, i8 %z3, i8 %z4, i8 %z5, i8 %z6, i8 %z7, i8 %z8, i8 %z9) { ; CHECK-LABEL: @lshr_ands_x7_noconsts( -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[AMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[AMT]] -; CHECK-NEXT: [[X0:%.*]] = and i8 [[SX]], [[Z0:%.*]] -; CHECK-NEXT: [[X1:%.*]] = and i8 [[X0]], [[Z1:%.*]] -; CHECK-NEXT: [[X2:%.*]] = and i8 [[X1]], [[Z2:%.*]] -; CHECK-NEXT: [[X3:%.*]] = and i8 [[X2]], [[Z3:%.*]] -; CHECK-NEXT: [[X4:%.*]] = and i8 [[X3]], [[Z4:%.*]] -; CHECK-NEXT: [[X5:%.*]] = and i8 [[X4]], [[Z5:%.*]] -; CHECK-NEXT: [[X6:%.*]] = and i8 [[X5]], [[Z6:%.*]] -; CHECK-NEXT: [[X7:%.*]] = and i8 [[X6]], [[Z7:%.*]] -; CHECK-NEXT: [[Y8:%.*]] = and i8 [[SY]], [[Z8:%.*]] -; CHECK-NEXT: [[Y9:%.*]] = and i8 [[Y8]], [[Z9:%.*]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[X7]], [[Y9]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[Z7:%.*]], [[Z6:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], [[Z5:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], [[Z4:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[TMP4]], [[Z3:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = and i8 [[TMP5]], [[Z2:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], [[Z1:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[TMP7]], [[Z0:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = and i8 [[TMP8]], [[Z9:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = and i8 [[TMP9]], [[Z8:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = lshr i8 [[TMP1]], [[AMT:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP11]], [[TMP10]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = lshr i8 %x, %amt @@ -652,19 +649,18 @@ define i8 @shl_adds_x6_noconsts(i8 %amt, i8 %x, i8 %y, i8 %z0, i8 %z1, i8 %z2, i8 %z3, i8 %z4, i8 %z5, i8 %z6, i8 %z7, i8 %z8, i8 %z9) { ; CHECK-LABEL: @shl_adds_x6_noconsts( -; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[AMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[AMT]] -; CHECK-NEXT: [[X0:%.*]] = add i8 [[SX]], [[Z0:%.*]] -; CHECK-NEXT: [[X1:%.*]] = add i8 [[X0]], [[Z1:%.*]] -; CHECK-NEXT: [[X2:%.*]] = add i8 [[X1]], [[Z2:%.*]] -; CHECK-NEXT: [[X3:%.*]] = add i8 [[X2]], [[Z3:%.*]] -; CHECK-NEXT: [[X4:%.*]] = add i8 [[X3]], [[Z4:%.*]] -; CHECK-NEXT: [[X5:%.*]] = add i8 [[X4]], [[Z5:%.*]] -; CHECK-NEXT: [[X6:%.*]] = add i8 [[X5]], [[Z6:%.*]] -; CHECK-NEXT: [[Y7:%.*]] = add i8 [[SY]], [[Z7:%.*]] -; CHECK-NEXT: [[Y8:%.*]] = add i8 [[Y7]], [[Z8:%.*]] -; CHECK-NEXT: [[Y9:%.*]] = add i8 [[Y8]], [[Z9:%.*]] -; CHECK-NEXT: [[R:%.*]] = add i8 [[X6]], [[Y9]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[Z6:%.*]], [[Z5:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], [[Z4:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[TMP3]], [[Z3:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = add i8 [[TMP4]], [[Z2:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = add i8 [[TMP5]], [[Z1:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[TMP6]], [[Z0:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[TMP7]], [[Z9:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], [[Z8:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[TMP9]], [[Z7:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = shl i8 [[TMP1]], [[AMT:%.*]] +; CHECK-NEXT: [[R:%.*]] = add i8 [[TMP11]], [[TMP10]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = shl i8 %x, %amt @@ -685,17 +681,16 @@ define i8 @shl_adds_x5(i8 %amt, i8 %x, i8 %y, i8 %z0, i8 %z1, i8 %z2, i8 %z3, i8 %z4, i8 %z5, i8 %z6, i8 %z7, i8 %z8, i8 %z9) { ; CHECK-LABEL: @shl_adds_x5( -; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[AMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[AMT]] -; CHECK-NEXT: [[X0:%.*]] = add i8 [[SX]], [[Z0:%.*]] -; CHECK-NEXT: [[X1:%.*]] = add i8 [[X0]], 88 -; CHECK-NEXT: [[X2:%.*]] = add i8 [[X1]], [[Z2:%.*]] -; CHECK-NEXT: [[X4:%.*]] = add i8 [[X2]], -46 -; CHECK-NEXT: [[X5:%.*]] = add i8 [[X4]], [[Z5:%.*]] -; CHECK-NEXT: [[Y7:%.*]] = add i8 [[SY]], -23 -; CHECK-NEXT: [[Y8:%.*]] = add i8 [[Y7]], [[Z8:%.*]] -; CHECK-NEXT: [[Y9:%.*]] = add i8 [[Y8]], 22 -; CHECK-NEXT: [[R:%.*]] = add i8 [[X5]], [[Y9]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[Z5:%.*]], -46 +; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], [[Z2:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[TMP3]], 88 +; CHECK-NEXT: [[TMP5:%.*]] = add i8 [[TMP4]], [[Z0:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = add i8 [[TMP5]], 22 +; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[TMP6]], [[Z8:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[TMP7]], -23 +; CHECK-NEXT: [[TMP9:%.*]] = shl i8 [[TMP1]], [[AMT:%.*]] +; CHECK-NEXT: [[R:%.*]] = add i8 [[TMP9]], [[TMP8]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = shl i8 %x, %amt @@ -803,11 +798,10 @@ define <2 x i8> @shl_xor_add_and(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_xor_add_and( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[BW3:%.*]] = xor <2 x i8> [[SHIFT1]], -; CHECK-NEXT: [[BW2:%.*]] = add <2 x i8> [[BW3]], -; CHECK-NEXT: [[BW1:%.*]] = and <2 x i8> [[SHIFT2]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i8> [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[BW1:%.*]] = shl <2 x i8> [[TMP3]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; %shift1 = shl <2 x i8> %x, @@ -821,12 +815,11 @@ define <2 x i8> @shl_xor_or_add_xor(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_xor_or_add_xor( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[BW3:%.*]] = xor <2 x i8> [[SHIFT1]], -; CHECK-NEXT: [[BW2:%.*]] = or <2 x i8> [[BW3]], -; CHECK-NEXT: [[BW1:%.*]] = add <2 x i8> [[SHIFT2]], -; CHECK-NEXT: [[BW0:%.*]] = xor <2 x i8> [[BW1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i8> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[BW0:%.*]] = shl <2 x i8> [[TMP4]], ; CHECK-NEXT: ret <2 x i8> [[BW0]] ; %shift1 = shl <2 x i8> %x, @@ -860,12 +853,11 @@ define <2 x i8> @shl_xor_or_add_and(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_xor_or_add_and( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[BW3:%.*]] = xor <2 x i8> [[SHIFT1]], -; CHECK-NEXT: [[BW2:%.*]] = or <2 x i8> [[BW3]], -; CHECK-NEXT: [[BW1:%.*]] = add <2 x i8> [[SHIFT2]], -; CHECK-NEXT: [[BW0:%.*]] = and <2 x i8> [[BW1]], [[BW2]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i8> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[BW0:%.*]] = shl <2 x i8> [[TMP4]], ; CHECK-NEXT: ret <2 x i8> [[BW0]] ; %shift1 = shl <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/or-shifted-masks.ll b/llvm/test/Transforms/InstCombine/or-shifted-masks.ll --- a/llvm/test/Transforms/InstCombine/or-shifted-masks.ll +++ b/llvm/test/Transforms/InstCombine/or-shifted-masks.ll @@ -126,15 +126,12 @@ ; CHECK-NEXT: [[I1:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: br i1 [[I1]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: -; CHECK-NEXT: [[I:%.*]] = lshr i32 [[X]], 22 -; CHECK-NEXT: [[I2:%.*]] = and i32 [[I]], 24 -; CHECK-NEXT: [[I3:%.*]] = lshr i32 [[X]], 22 -; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 480 -; CHECK-NEXT: [[I5:%.*]] = or i32 [[I4]], [[I2]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 22 +; CHECK-NEXT: [[I5:%.*]] = and i32 [[TMP1]], 504 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 17 -; CHECK-NEXT: [[I9:%.*]] = and i32 [[TMP1]], 16128 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[X]], 17 +; CHECK-NEXT: [[I9:%.*]] = and i32 [[TMP2]], 16128 ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[I10:%.*]] = phi i32 [ [[I5]], [[IF]] ], [ [[I9]], [[ELSE]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll --- a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll +++ b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll @@ -63,31 +63,31 @@ ; CHECK-NEXT: [[IDX_NEG_1_1:%.*]] = xor i64 [[INDVARS_IV_1]], -1 ; CHECK-NEXT: [[ADD_PTR_1_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_1]] ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP8]] ; CHECK-NEXT: [[IDX_NEG_1_2:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_2]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], [[TMP10]] ; CHECK-NEXT: [[IDX_NEG_1_3:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_3]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP12]] ; CHECK-NEXT: [[IDX_NEG_1_4:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_4]] ; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[IDX_NEG_1_5:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_5]] ; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], [[TMP16]] ; CHECK-NEXT: [[IDX_NEG_1_6:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_6]] ; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], [[TMP18]] ; CHECK-NEXT: [[IDX_NEG_1_7:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_7]] ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP21]], [[TMP20]] ; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 1 ; CHECK-NEXT: [[ADD_1_7]] = add i32 [[TMP23]], [[SUM_11_1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1_7]] = add nuw nsw i64 [[INDVARS_IV_1]], 8