Index: llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1744,6 +1744,29 @@ return nullptr; } +/// Try to reassociate a pair of binops so that values with one use only are +/// part of the same instruction. This may enable folds that are limited with +/// multi-use restrictions and makes it more likely to match other patterns that +/// are looking for a common operand. +static Instruction *reassociateForUses(BinaryOperator &BO, + InstCombinerImpl::BuilderTy &Builder) { + Instruction::BinaryOps Opcode = BO.getOpcode(); + Value *X, *Y, *Z; + if (match(&BO, m_c_BinOp(Opcode, + m_OneUse(m_c_BinOp(Opcode, m_Value(X), + m_OneUse(m_Value(Y)))), + m_OneUse(m_Value(Z))))) { + // (X op Y) op Z --> (Y op Z) op X + if (!isa(X) && !isa(Y) && !isa(Z) && + !X->hasOneUse()) { + Value *YZ = Builder.CreateBinOp(Opcode, Y, Z); + return BinaryOperator::Create(Opcode, YZ, X); + } + } + + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -2204,6 +2227,9 @@ if (matchSimpleRecurrence(&I, PN, Start, Step) && DT.dominates(Step, PN)) return replaceInstUsesWith(I, Builder.CreateAnd(Start, Step)); + if (Instruction *R = reassociateForUses(I, Builder)) + return R; + return nullptr; } @@ -3143,6 +3169,9 @@ Builder.CreateOr(C, Builder.CreateAnd(A, B)), D); } + if (Instruction *R = reassociateForUses(I, Builder)) + return R; + return nullptr; } @@ -3880,5 +3909,8 @@ m_Value(Y)))) return BinaryOperator::CreateXor(Builder.CreateXor(X, Y), C1); + if (Instruction *R = reassociateForUses(I, Builder)) + return R; + return nullptr; } Index: llvm/test/Transforms/InstCombine/and-or-icmps.ll =================================================================== --- llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -373,8 +373,8 @@ ; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0 ; CHECK-NEXT: [[B15:%.*]] = xor i1 [[C7]], [[C10]] ; CHECK-NEXT: [[C6:%.*]] = xor i1 [[B15]], true -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C10]], [[C5]] -; CHECK-NEXT: [[C3:%.*]] = and i1 [[TMP2]], [[C6]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C5]], [[C6]] +; CHECK-NEXT: [[C3:%.*]] = and i1 [[TMP2]], [[C10]] ; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[C10]], true ; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[C3]] to i64 Index: llvm/test/Transforms/InstCombine/and-or-not.ll =================================================================== --- llvm/test/Transforms/InstCombine/and-or-not.ll +++ llvm/test/Transforms/InstCombine/and-or-not.ll @@ -645,11 +645,7 @@ define i4 @simplify_or_common_op_commute0(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @simplify_or_common_op_commute0( -; CHECK-NEXT: [[XY:%.*]] = and i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XYZ:%.*]] = and i4 [[XY]], [[Z:%.*]] -; CHECK-NEXT: [[NOT_XYZ:%.*]] = xor i4 [[XYZ]], -1 -; CHECK-NEXT: [[R:%.*]] = or i4 [[NOT_XYZ]], [[X]] -; CHECK-NEXT: ret i4 [[R]] +; CHECK-NEXT: ret i4 -1 ; %xy = and i4 %x, %y %xyz = and i4 %xy, %z @@ -660,11 +656,7 @@ define i4 @simplify_or_common_op_commute1(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @simplify_or_common_op_commute1( -; CHECK-NEXT: [[XY:%.*]] = and i4 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[XYZ:%.*]] = and i4 [[XY]], [[Z:%.*]] -; CHECK-NEXT: [[NOT_XYZ:%.*]] = xor i4 [[XYZ]], -1 -; CHECK-NEXT: [[R:%.*]] = or i4 [[NOT_XYZ]], [[X]] -; CHECK-NEXT: ret i4 [[R]] +; CHECK-NEXT: ret i4 -1 ; %xy = and i4 %y, %x %xyz = and i4 %xy, %z @@ -673,15 +665,11 @@ ret i4 %r } +; The common operand may bubble through multiple instructions. + define i4 @simplify_or_common_op_commute2(i4 %x, i4 %y, i4 %p, i4 %q) { ; CHECK-LABEL: @simplify_or_common_op_commute2( -; CHECK-NEXT: [[Z:%.*]] = mul i4 [[P:%.*]], [[P]] -; CHECK-NEXT: [[XY:%.*]] = and i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XYZ:%.*]] = and i4 [[Z]], [[XY]] -; CHECK-NEXT: [[XYZQ:%.*]] = and i4 [[XYZ]], [[Q:%.*]] -; CHECK-NEXT: [[NOT_XYZQ:%.*]] = xor i4 [[XYZQ]], -1 -; CHECK-NEXT: [[R:%.*]] = or i4 [[NOT_XYZQ]], [[X]] -; CHECK-NEXT: ret i4 [[R]] +; CHECK-NEXT: ret i4 -1 ; %z = mul i4 %p, %p ; thwart complexity-based canonicalization %xy = and i4 %x, %y @@ -694,12 +682,7 @@ define <2 x i4> @simplify_or_common_op_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %p) { ; CHECK-LABEL: @simplify_or_common_op_commute3( -; CHECK-NEXT: [[Z:%.*]] = mul <2 x i4> [[P:%.*]], [[P]] -; CHECK-NEXT: [[XY:%.*]] = and <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[XYZ:%.*]] = and <2 x i4> [[Z]], [[XY]] -; CHECK-NEXT: [[NOT_XYZ:%.*]] = xor <2 x i4> [[XYZ]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[NOT_XYZ]], [[X]] -; CHECK-NEXT: ret <2 x i4> [[R]] +; CHECK-NEXT: ret <2 x i4> ; %z = mul <2 x i4> %p, %p ; thwart complexity-based canonicalization %xy = and <2 x i4> %y, %x @@ -711,12 +694,8 @@ define i4 @simplify_and_common_op_commute0(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @simplify_and_common_op_commute0( -; CHECK-NEXT: [[XY:%.*]] = or i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: call void @use(i4 [[X]]) -; CHECK-NEXT: [[XYZ:%.*]] = or i4 [[XY]], [[Z:%.*]] -; CHECK-NEXT: [[NOT_XYZ:%.*]] = xor i4 [[XYZ]], -1 -; CHECK-NEXT: [[R:%.*]] = and i4 [[NOT_XYZ]], [[X]] -; CHECK-NEXT: ret i4 [[R]] +; CHECK-NEXT: call void @use(i4 [[X:%.*]]) +; CHECK-NEXT: ret i4 0 ; %xy = or i4 %x, %y call void @use(i4 %x) @@ -728,11 +707,7 @@ define i4 @simplify_and_common_op_commute1(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @simplify_and_common_op_commute1( -; CHECK-NEXT: [[XY:%.*]] = or i4 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[XYZ:%.*]] = or i4 [[XY]], [[Z:%.*]] -; CHECK-NEXT: [[NOT_XYZ:%.*]] = xor i4 [[XYZ]], -1 -; CHECK-NEXT: [[R:%.*]] = and i4 [[NOT_XYZ]], [[X]] -; CHECK-NEXT: ret i4 [[R]] +; CHECK-NEXT: ret i4 0 ; %xy = or i4 %y, %x %xyz = or i4 %xy, %z @@ -741,15 +716,11 @@ ret i4 %r } +; The common operand may bubble through multiple instructions. + define i4 @simplify_and_common_op_commute2(i4 %x, i4 %y, i4 %p, i4 %q) { ; CHECK-LABEL: @simplify_and_common_op_commute2( -; CHECK-NEXT: [[Z:%.*]] = mul i4 [[P:%.*]], [[P]] -; CHECK-NEXT: [[XY:%.*]] = or i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XYZ:%.*]] = or i4 [[Z]], [[XY]] -; CHECK-NEXT: [[XYZQ:%.*]] = or i4 [[XYZ]], [[Q:%.*]] -; CHECK-NEXT: [[NOT_XYZQ:%.*]] = xor i4 [[XYZQ]], -1 -; CHECK-NEXT: [[R:%.*]] = and i4 [[NOT_XYZQ]], [[X]] -; CHECK-NEXT: ret i4 [[R]] +; CHECK-NEXT: ret i4 0 ; %z = mul i4 %p, %p ; thwart complexity-based canonicalization %xy = or i4 %x, %y @@ -762,12 +733,7 @@ define <2 x i4> @simplify_and_common_op_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %p) { ; CHECK-LABEL: @simplify_and_common_op_commute3( -; CHECK-NEXT: [[Z:%.*]] = mul <2 x i4> [[P:%.*]], [[P]] -; CHECK-NEXT: [[XY:%.*]] = or <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[XYZ:%.*]] = or <2 x i4> [[Z]], [[XY]] -; CHECK-NEXT: [[NOT_XYZ:%.*]] = xor <2 x i4> [[XYZ]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[NOT_XYZ]], [[X]] -; CHECK-NEXT: ret <2 x i4> [[R]] +; CHECK-NEXT: ret <2 x i4> zeroinitializer ; %z = mul <2 x i4> %p, %p ; thwart complexity-based canonicalization %xy = or <2 x i4> %y, %x @@ -777,6 +743,8 @@ ret <2 x i4> %r } +; TODO: This should simplify. + define i4 @simplify_and_common_op_use1(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @simplify_and_common_op_use1( ; CHECK-NEXT: [[XY:%.*]] = or i4 [[X:%.*]], [[Y:%.*]] @@ -794,6 +762,8 @@ ret i4 %r } +; TODO: This should simplify. + define i4 @simplify_and_common_op_use2(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @simplify_and_common_op_use2( ; CHECK-NEXT: [[XY:%.*]] = or i4 [[X:%.*]], [[Y:%.*]] @@ -813,9 +783,8 @@ define i4 @reduce_xor_common_op_commute0(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @reduce_xor_common_op_commute0( -; CHECK-NEXT: [[XY:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XYZ:%.*]] = xor i4 [[XY]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = or i4 [[XYZ]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[R:%.*]] = or i4 [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret i4 [[R]] ; %xy = xor i4 %x, %y @@ -826,9 +795,8 @@ define i4 @reduce_xor_common_op_commute1(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @reduce_xor_common_op_commute1( -; CHECK-NEXT: [[XY:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[XYZ:%.*]] = xor i4 [[XY]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = or i4 [[XYZ]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[R:%.*]] = or i4 [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret i4 [[R]] ; %xy = xor i4 %y, %x @@ -840,11 +808,9 @@ define i4 @annihilate_xor_common_op_commute2(i4 %x, i4 %y, i4 %p, i4 %q) { ; CHECK-LABEL: @annihilate_xor_common_op_commute2( ; CHECK-NEXT: [[Z:%.*]] = mul i4 [[P:%.*]], [[P]] -; CHECK-NEXT: [[XY:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XYZ:%.*]] = xor i4 [[Z]], [[XY]] -; CHECK-NEXT: [[XYZQ:%.*]] = xor i4 [[XYZ]], [[Q:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[XYZQ]], [[X]] -; CHECK-NEXT: ret i4 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[Z]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i4 [[TMP1]], [[Q:%.*]] +; CHECK-NEXT: ret i4 [[TMP2]] ; %z = mul i4 %p, %p ; thwart complexity-based canonicalization %xy = xor i4 %x, %y @@ -857,9 +823,8 @@ define <2 x i4> @reduce_xor_common_op_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %p) { ; CHECK-LABEL: @reduce_xor_common_op_commute3( ; CHECK-NEXT: [[Z:%.*]] = mul <2 x i4> [[P:%.*]], [[P]] -; CHECK-NEXT: [[XY:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[XYZ:%.*]] = xor <2 x i4> [[Z]], [[XY]] -; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[XYZ]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[Z]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %z = mul <2 x i4> %p, %p ; thwart complexity-based canonicalization Index: llvm/test/Transforms/InstCombine/and-or.ll =================================================================== --- llvm/test/Transforms/InstCombine/and-or.ll +++ llvm/test/Transforms/InstCombine/and-or.ll @@ -672,10 +672,10 @@ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHR]], 157 ; CHECK-NEXT: call void @use2(i32 [[AND]]) ; CHECK-NEXT: [[AND3:%.*]] = and i32 [[SHR]], [[B:%.*]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND3]], [[AND]] ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[B]], 23 ; CHECK-NEXT: [[AND9:%.*]] = and i32 [[TMP1]], 157 -; CHECK-NEXT: [[R:%.*]] = or i32 [[OR]], [[AND9]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[AND3]], [[AND9]] +; CHECK-NEXT: [[R:%.*]] = or i32 [[TMP2]], [[AND]] ; CHECK-NEXT: ret i32 [[R]] ; %shr = ashr i32 %a, 23 @@ -701,8 +701,8 @@ ; CHECK-NEXT: [[TMP4:%.*]] = xor i1 [[TMP3]], true ; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP0]], [[A_1]] ; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP2]], [[B_1]] -; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP3]], [[TMP6]] -; CHECK-NEXT: [[D:%.*]] = or i1 [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[D:%.*]] = or i1 [[TMP7]], [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { i1, i1, i1, i1, i1 } zeroinitializer, i1 [[D]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertvalue { i1, i1, i1, i1, i1 } [[TMP9]], i1 [[TMP4]], 1 Index: llvm/test/Transforms/InstCombine/and-xor-or.ll =================================================================== --- llvm/test/Transforms/InstCombine/and-xor-or.ll +++ llvm/test/Transforms/InstCombine/and-xor-or.ll @@ -3637,8 +3637,8 @@ ; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[TMP1]], [[B]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] ; CHECK-NEXT: call void @use(i32 [[OR1]]) ; CHECK-NEXT: ret i32 [[OR2]] @@ -3659,8 +3659,8 @@ ; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[TMP1]], [[B]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] ; CHECK-NEXT: call void @use(i32 [[NOT1]]) ; CHECK-NEXT: ret i32 [[OR2]] @@ -3702,8 +3702,8 @@ ; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[TMP1]], [[B]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] ; CHECK-NEXT: call void @use(i32 [[AND2]]) ; CHECK-NEXT: ret i32 [[OR2]] @@ -3916,11 +3916,10 @@ ; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_use5 ; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { ; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] -; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT2]], [[B]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[OR2]], [[NOT1]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[TMP1]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = xor i32 [[AND1]], [[OR2]] ; CHECK-NEXT: call void @use(i32 [[AND1]]) ; CHECK-NEXT: ret i32 [[AND2]] ; @@ -3940,9 +3939,9 @@ ; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT2]], [[B]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[OR2]], [[NOT1]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[TMP1]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = xor i32 [[AND1]], [[OR2]] ; CHECK-NEXT: call void @use(i32 [[NOT1]]) ; CHECK-NEXT: ret i32 [[AND2]] ; @@ -3981,11 +3980,10 @@ ; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_use8 ; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { ; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] -; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT2]], [[B]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[OR2]], [[NOT1]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[TMP1]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = xor i32 [[AND1]], [[OR2]] ; CHECK-NEXT: call void @use(i32 [[OR2]]) ; CHECK-NEXT: ret i32 [[AND2]] ; Index: llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll +++ llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll @@ -180,9 +180,9 @@ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) -; CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i32> [[TMP4]], [[WIDE_MASKED_LOAD1]] -; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP5]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP6]] = and <4 x i32> [[VEC_PHI]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] Index: llvm/test/Transforms/LoopVectorize/reduction-predselect.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/reduction-predselect.ll +++ llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -366,9 +366,9 @@ ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP40:%.*]] = and <4 x i32> [[VEC_PHI]], [[TMP38]] -; CHECK-NEXT: [[TMP41:%.*]] = and <4 x i32> [[TMP40]], [[TMP39]] -; CHECK-NEXT: [[TMP42]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP41]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP40:%.*]] = and <4 x i32> [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> +; CHECK-NEXT: [[TMP42]] = and <4 x i32> [[VEC_PHI]], [[TMP41]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260