diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1383,6 +1383,9 @@ if (Instruction *X = foldNoWrapAdd(I, Builder)) return X; + if (Instruction *R = foldBinOpShiftWithShift(I)) + return R; + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); Type *Ty = I.getType(); if (Ty->isIntOrIntVectorTy(1)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2155,6 +2155,9 @@ if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); + if (Instruction *R = foldBinOpShiftWithShift(I)) + return R; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Value *X, *Y; @@ -3202,6 +3205,9 @@ if (Instruction *Concat = matchOrConcat(I, Builder)) return replaceInstUsesWith(I, Concat); + if (Instruction *R = foldBinOpShiftWithShift(I)) + return R; + Value *X, *Y; const APInt *CV; if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) && @@ -4275,6 +4281,9 @@ if (Instruction *R = foldNot(I)) return R; + if (Instruction *R = foldBinOpShiftWithShift(I)) + return R; + // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M) // This it a special case in haveNoCommonBitsSet, but the computeKnownBits // calls in there are unnecessary as SimplifyDemandedInstructionBits should diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -450,6 +450,23 @@ Value *SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS); + // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C)) + // IFF + // 1) the logic_shifts match + // 2) either both binops are binops and one is `and` or + // BinOp1 is `and` + // (logic_shift (inv_logic_shift C1, C), C) == C1 or + // + // -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C) + // + // (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt)) + // IFF + // 1) the logic_shifts match + // 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`). + // + // -> (BinOp (logic_shift (BinOp X, Y)), Mask) + Instruction *foldBinOpShiftWithShift(BinaryOperator &I); + /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). Value *tryFactorizationFolds(BinaryOperator &I); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -730,6 +730,121 @@ return RetVal; } +Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) { + auto IsValidBinOpc = [](unsigned Opc) { + switch (Opc) { + default: + return false; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Add: + // Skip Sub as we only match constant masks which will canonicalize to use + // add. + return true; + } + }; + + // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra + // constraints. + auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2, + unsigned ShOpc) { + return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) || + ShOpc == Instruction::Shl; + }; + + auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * { + Constant *CMask, *CShift; + Value *X, *Y, *ShiftedX, *Mask, *Shift; + if (!match(I.getOperand(ShOpnum), + m_OneUse(m_LogicalShift(m_Value(Y), m_Value(Shift))))) + return nullptr; + if (!match(I.getOperand(1 - ShOpnum), + m_BinOp(m_Value(ShiftedX), m_Value(Mask)))) + return nullptr; + + if (!match(ShiftedX, + m_OneUse(m_LogicalShift(m_Value(X), m_Specific(Shift))))) + return nullptr; + + // Make sure we are matching instruction shifts and not ConstantExpr + auto *IY = dyn_cast(I.getOperand(ShOpnum)); + auto *IX = dyn_cast(ShiftedX); + if (!IY || !IX) + return nullptr; + + // LHS and RHS need same shift opcode + unsigned ShOpc = IY->getOpcode(); + if (ShOpc != IX->getOpcode()) + return nullptr; + + // Make sure binop is real instruction and not ConstantExpr + auto *BO2 = dyn_cast(I.getOperand(1 - ShOpnum)); + if (!BO2) + return nullptr; + + unsigned BinOpc = BO2->getOpcode(); + // Make sure we have valid binops. + if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc)) + return nullptr; + + // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just + // distribute to drop the shift irrelevant of constants. + if (BinOpc == I.getOpcode() && + IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) { + Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y); + Value *NewBinOp1 = Builder.CreateBinOp( + static_cast(ShOpc), NewBinOp2, Shift); + return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask); + } + + // Otherwise we can only distribute by constant shifting the mask, so + // ensure we have constants. + if (!match(Shift, m_ImmConstant(CShift))) + return nullptr; + if (!match(Mask, m_ImmConstant(CMask))) + return nullptr; + + // If the BinOp1 is `and` we don't need to check the mask. + unsigned InvShOpc = + ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr; + if (I.getOpcode() == Instruction::And) { + // Pass + } + // For all other possible transfers we need complete distributable + // binop/shift (anything but `add` + `lshr`). + else if (!IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) { + return nullptr; + } + // If BinOp2 is `and`, and mask works (this only really helps for non-splat + // vecs, otherwise the mask will be simplified and the following check will + // handle it). + else if (BinOpc == Instruction::And) { + // Pass + } + // Otherwise, need mask that meets the below requirement. + // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask + else if (ConstantExpr::get(ShOpc, + ConstantExpr::get(InvShOpc, CMask, CShift), + CShift) == CMask) { + // Pass + } else { + return nullptr; + } + + Constant *NewCMask = ConstantExpr::get(InvShOpc, CMask, CShift); + Value *NewBinOp2 = Builder.CreateBinOp( + static_cast(BinOpc), X, NewCMask); + Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2); + return BinaryOperator::Create(static_cast(ShOpc), + NewBinOp1, CShift); + }; + + if (Instruction *R = MatchBinOp(0)) + return R; + return MatchBinOp(1); +} + Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); BinaryOperator *Op0 = dyn_cast(LHS); diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll --- a/llvm/test/Transforms/InstCombine/and-xor-or.ll +++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll @@ -356,10 +356,9 @@ define i8 @and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; CHECK-LABEL: define {{[^@]+}}@and_shl ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { -; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X]], [[SHAMT]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[SY]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], [[SHAMT]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP2]], [[Z]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = shl i8 %x, %shamt @@ -372,10 +371,9 @@ define i8 @or_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; CHECK-LABEL: define {{[^@]+}}@or_shl ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { -; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X]], [[SHAMT]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[SY]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], [[SHAMT]] +; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP2]], [[Z]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = shl i8 %x, %shamt @@ -424,10 +422,9 @@ define i8 @or_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; CHECK-LABEL: define {{[^@]+}}@or_lshr ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = or i8 [[SY]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SHAMT]] +; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP2]], [[Z]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = lshr i8 %x, %shamt @@ -440,10 +437,9 @@ define i8 @xor_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; CHECK-LABEL: define {{[^@]+}}@xor_lshr ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SHAMT]] +; CHECK-NEXT: [[R:%.*]] = xor i8 [[TMP2]], [[Z]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = lshr i8 %x, %shamt @@ -565,9 +561,10 @@ ; CHECK-LABEL: define {{[^@]+}}@xor_lshr_multiuse ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { ; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] ; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SHAMT]] +; CHECK-NEXT: [[R:%.*]] = xor i8 [[TMP2]], [[Z]] ; CHECK-NEXT: [[R2:%.*]] = sdiv i8 [[A]], [[R]] ; CHECK-NEXT: ret i8 [[R2]] ; diff --git a/llvm/test/Transforms/InstCombine/or-shifted-masks.ll b/llvm/test/Transforms/InstCombine/or-shifted-masks.ll --- a/llvm/test/Transforms/InstCombine/or-shifted-masks.ll +++ b/llvm/test/Transforms/InstCombine/or-shifted-masks.ll @@ -75,16 +75,10 @@ define i32 @multiuse2(i32 %x) { ; CHECK-LABEL: @multiuse2( -; CHECK-NEXT: [[I:%.*]] = shl i32 [[X:%.*]], 1 -; CHECK-NEXT: [[I2:%.*]] = and i32 [[I]], 12 -; CHECK-NEXT: [[I6:%.*]] = shl i32 [[X]], 8 -; CHECK-NEXT: [[I7:%.*]] = and i32 [[I6]], 24576 -; CHECK-NEXT: [[I14:%.*]] = shl i32 [[X]], 8 -; CHECK-NEXT: [[I9:%.*]] = and i32 [[I14]], 7680 -; CHECK-NEXT: [[I10:%.*]] = or i32 [[I7]], [[I9]] -; CHECK-NEXT: [[I85:%.*]] = shl i32 [[X]], 1 -; CHECK-NEXT: [[I11:%.*]] = and i32 [[I85]], 240 -; CHECK-NEXT: [[I12:%.*]] = or i32 [[I2]], [[I11]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 8 +; CHECK-NEXT: [[I10:%.*]] = and i32 [[TMP1]], 32256 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[X]], 1 +; CHECK-NEXT: [[I12:%.*]] = and i32 [[TMP2]], 252 ; CHECK-NEXT: [[I13:%.*]] = or i32 [[I10]], [[I12]] ; CHECK-NEXT: ret i32 [[I13]] ; @@ -107,15 +101,10 @@ define i32 @multiuse3(i32 %x) { ; CHECK-LABEL: @multiuse3( -; CHECK-NEXT: [[I:%.*]] = and i32 [[X:%.*]], 96 -; CHECK-NEXT: [[I1:%.*]] = shl nuw nsw i32 [[I]], 6 -; CHECK-NEXT: [[I2:%.*]] = lshr exact i32 [[I]], 1 -; CHECK-NEXT: [[I3:%.*]] = shl i32 [[X]], 6 -; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 1920 -; CHECK-NEXT: [[I5:%.*]] = or i32 [[I1]], [[I4]] -; CHECK-NEXT: [[I6:%.*]] = lshr i32 [[X]], 1 -; CHECK-NEXT: [[I7:%.*]] = and i32 [[I6]], 15 -; CHECK-NEXT: [[I8:%.*]] = or i32 [[I2]], [[I7]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 6 +; CHECK-NEXT: [[I5:%.*]] = and i32 [[TMP1]], 8064 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[X]], 1 +; CHECK-NEXT: [[I8:%.*]] = and i32 [[TMP2]], 63 ; CHECK-NEXT: [[I9:%.*]] = or i32 [[I8]], [[I5]] ; CHECK-NEXT: ret i32 [[I9]] ; @@ -134,20 +123,18 @@ define i32 @multiuse4(i32 %x) local_unnamed_addr { ; CHECK-LABEL: @multiuse4( -; CHECK-NEXT: [[I:%.*]] = and i32 [[X:%.*]], 100663296 -; CHECK-NEXT: [[I1:%.*]] = icmp sgt i32 [[X]], -1 +; CHECK-NEXT: [[I1:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: br i1 [[I1]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: -; CHECK-NEXT: [[I2:%.*]] = lshr exact i32 [[I]], 22 +; CHECK-NEXT: [[I:%.*]] = lshr i32 [[X]], 22 +; CHECK-NEXT: [[I2:%.*]] = and i32 [[I]], 24 ; CHECK-NEXT: [[I3:%.*]] = lshr i32 [[X]], 22 ; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 480 ; CHECK-NEXT: [[I5:%.*]] = or i32 [[I4]], [[I2]] ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: -; CHECK-NEXT: [[I6:%.*]] = lshr exact i32 [[I]], 17 -; CHECK-NEXT: [[I7:%.*]] = lshr i32 [[X]], 17 -; CHECK-NEXT: [[I8:%.*]] = and i32 [[I7]], 15360 -; CHECK-NEXT: [[I9:%.*]] = or i32 [[I8]], [[I6]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 17 +; CHECK-NEXT: [[I9:%.*]] = and i32 [[TMP1]], 16128 ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[I10:%.*]] = phi i32 [ [[I5]], [[IF]] ], [ [[I9]], [[ELSE]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll --- a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll +++ b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll @@ -60,43 +60,36 @@ ; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i64 0, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1]] ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ADD_PTR_1]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1:%.*]] = shl i32 [[TMP8]], 1 -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_1]], [[SUM_11_1]] ; CHECK-NEXT: [[IDX_NEG_1_1:%.*]] = xor i64 [[INDVARS_IV_1]], -1 ; CHECK-NEXT: [[ADD_PTR_1_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_1]] ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_1:%.*]] = shl i32 [[TMP9]], 1 -; CHECK-NEXT: [[ADD_1_1:%.*]] = add i32 [[MUL_1_1]], [[ADD_1]] +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[IDX_NEG_1_2:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_2]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_2:%.*]] = shl i32 [[TMP10]], 1 -; CHECK-NEXT: [[ADD_1_2:%.*]] = add i32 [[MUL_1_2]], [[ADD_1_1]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[IDX_NEG_1_3:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_3]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_3:%.*]] = shl i32 [[TMP11]], 1 -; CHECK-NEXT: [[ADD_1_3:%.*]] = add i32 [[MUL_1_3]], [[ADD_1_2]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[IDX_NEG_1_4:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_4]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_4:%.*]] = shl i32 [[TMP12]], 1 -; CHECK-NEXT: [[ADD_1_4:%.*]] = add i32 [[MUL_1_4]], [[ADD_1_3]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP14]], [[TMP15]] ; CHECK-NEXT: [[IDX_NEG_1_5:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_5]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_5:%.*]] = shl i32 [[TMP13]], 1 -; CHECK-NEXT: [[ADD_1_5:%.*]] = add i32 [[MUL_1_5]], [[ADD_1_4]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]] ; CHECK-NEXT: [[IDX_NEG_1_6:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_6]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_6:%.*]] = shl i32 [[TMP14]], 1 -; CHECK-NEXT: [[ADD_1_6:%.*]] = add i32 [[MUL_1_6]], [[ADD_1_5]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP18]], [[TMP19]] ; CHECK-NEXT: [[IDX_NEG_1_7:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_7]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_1_7:%.*]] = shl i32 [[TMP15]], 1 -; CHECK-NEXT: [[ADD_1_7]] = add i32 [[MUL_1_7]], [[ADD_1_6]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 1 +; CHECK-NEXT: [[ADD_1_7]] = add i32 [[TMP23]], [[SUM_11_1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1_7]] = add nuw nsw i64 [[INDVARS_IV_1]], 8 ; CHECK-NEXT: [[EXITCOND_1_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1_7]], 32 ; CHECK-NEXT: br i1 [[EXITCOND_1_NOT_7]], label [[FOR_BODY4_2:%.*]], label [[FOR_BODY4_1]], !llvm.loop [[LOOP7]] @@ -105,43 +98,43 @@ ; CHECK-NEXT: [[SUM_11_2:%.*]] = phi i32 [ [[ADD_2_7:%.*]], [[FOR_BODY4_2]] ], [ [[ADD_1_7]], [[FOR_BODY4_1]] ] ; CHECK-NEXT: [[IDX_NEG_2:%.*]] = sub nsw i64 0, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP16]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP24]], 3 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], [[SUM_11_2]] ; CHECK-NEXT: [[IDX_NEG_2_1:%.*]] = xor i64 [[INDVARS_IV_2]], -1 ; CHECK-NEXT: [[ADD_PTR_2_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_1]] -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_1:%.*]] = mul i32 [[TMP17]], 3 +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_1:%.*]] = mul i32 [[TMP25]], 3 ; CHECK-NEXT: [[ADD_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD_2]] ; CHECK-NEXT: [[IDX_NEG_2_2:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_2]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_2:%.*]] = mul i32 [[TMP18]], 3 +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_2:%.*]] = mul i32 [[TMP26]], 3 ; CHECK-NEXT: [[ADD_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD_2_1]] ; CHECK-NEXT: [[IDX_NEG_2_3:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_3]] -; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_3:%.*]] = mul i32 [[TMP19]], 3 +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_3:%.*]] = mul i32 [[TMP27]], 3 ; CHECK-NEXT: [[ADD_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD_2_2]] ; CHECK-NEXT: [[IDX_NEG_2_4:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_4]] -; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_4:%.*]] = mul i32 [[TMP20]], 3 +; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_4:%.*]] = mul i32 [[TMP28]], 3 ; CHECK-NEXT: [[ADD_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD_2_3]] ; CHECK-NEXT: [[IDX_NEG_2_5:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_5]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_5:%.*]] = mul i32 [[TMP21]], 3 +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_5:%.*]] = mul i32 [[TMP29]], 3 ; CHECK-NEXT: [[ADD_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD_2_4]] ; CHECK-NEXT: [[IDX_NEG_2_6:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_6]] -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_6:%.*]] = mul i32 [[TMP22]], 3 +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_6:%.*]] = mul i32 [[TMP30]], 3 ; CHECK-NEXT: [[ADD_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD_2_5]] ; CHECK-NEXT: [[IDX_NEG_2_7:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_7]] -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[MUL_2_7:%.*]] = mul i32 [[TMP23]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[MUL_2_7:%.*]] = mul i32 [[TMP31]], 3 ; CHECK-NEXT: [[ADD_2_7]] = add i32 [[MUL_2_7]], [[ADD_2_6]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2_7]] = add nuw nsw i64 [[INDVARS_IV_2]], 8 ; CHECK-NEXT: [[EXITCOND_2_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2_7]], 32