diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1713,6 +1713,54 @@ // UB/poison potential, but that should be refined. BinaryOperator *BO; if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) { + Value *X = BO->getOperand(0); + Value *Y = BO->getOperand(1); + + // Look for an equivalent binop except that one operand has been shuffled. + // If the demand for this binop only includes elements that are the same as + // the other binop, then we may be able to replace this binop with a use of + // the earlier one. + // + // Example: + // %other_bo = bo (shuf X, {0}), Y + // %this_extracted_bo = extelt (bo X, Y), 0 + // --> + // %other_bo = bo (shuf X, {0}), Y + // %this_extracted_bo = extelt %other_bo, 0 + // + // TODO: Handle demand of an arbitrary single element or more than one + // element instead of just element 0. + // TODO: Unlike general demanded elements transforms, this should be safe + // for any (div/rem/shift) opcode too. + if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() && + BO->hasOneUse() ) { + + auto findShufBO = [&](bool MatchShufAsOp0) -> User * { + // Try to use shuffle-of-operand in place of an operand: + // bo X, Y --> bo (shuf X), Y + // bo X, Y --> bo X, (shuf Y) + BinaryOperator::BinaryOps Opcode = BO->getOpcode(); + Value *ShufOp = MatchShufAsOp0 ? X : Y; + Value *OtherOp = MatchShufAsOp0 ? Y : X; + for (User *U : OtherOp->users()) { + auto Shuf = m_Shuffle(m_Specific(ShufOp), m_Value(), m_ZeroMask()); + if (BO->isCommutative() + ? match(U, m_c_BinOp(Opcode, Shuf, m_Specific(OtherOp))) + : MatchShufAsOp0 + ? match(U, m_BinOp(Opcode, Shuf, m_Specific(OtherOp))) + : match(U, m_BinOp(Opcode, m_Specific(OtherOp), Shuf))) + if (DT.dominates(U, I)) + return U; + } + return nullptr; + }; + + if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true)) + return ShufBO; + if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ false)) + return ShufBO; + } + simplifyAndSetOp(I, 0, DemandedElts, UndefElts); simplifyAndSetOp(I, 1, DemandedElts, UndefElts2); diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -850,8 +850,7 @@ ; CHECK-LABEL: @common_binop_demand_via_splat_op0( ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_XSHUF_Y:%.*]] = mul <2 x i4> [[XSHUF]], [[Y:%.*]] -; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XSHUF_Y]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<2 x i4> [[B_XSHUF_Y]]) ; CHECK-NEXT: call void @use(<2 x i4> [[B_XY_SPLAT]]) ; CHECK-NEXT: ret void @@ -870,8 +869,7 @@ ; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> , [[P:%.*]] ; CHECK-NEXT: [[YSHUF:%.*]] = shufflevector <2 x i4> [[Y:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_X_YSHUF:%.*]] = mul <2 x i4> [[X]], [[YSHUF]] -; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_X_YSHUF]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<2 x i4> [[B_XY_SPLAT]]) ; CHECK-NEXT: call void @use(<2 x i4> [[B_X_YSHUF]]) ; CHECK-NEXT: ret void @@ -888,12 +886,11 @@ define void @common_binop_demand_via_splat_op0_commute(<2 x i4> %p, <2 x i4> %q) { ; CHECK-LABEL: @common_binop_demand_via_splat_op0_commute( -; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> , [[P:%.*]] +; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> , [[P:%.*]] ; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> , [[Q:%.*]] ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_Y_XSHUF:%.*]] = mul <2 x i4> [[Y]], [[XSHUF]] -; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_Y_XSHUF]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<2 x i4> [[B_XY_SPLAT]]) ; CHECK-NEXT: call void @use(<2 x i4> [[B_Y_XSHUF]]) ; CHECK-NEXT: ret void @@ -912,11 +909,10 @@ define void @common_binop_demand_via_splat_op1_commute(<2 x i4> %p, <2 x i4> %q) { ; CHECK-LABEL: @common_binop_demand_via_splat_op1_commute( ; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> , [[P:%.*]] -; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> , [[Q:%.*]] +; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> , [[Q:%.*]] ; CHECK-NEXT: [[YSHUF:%.*]] = shufflevector <2 x i4> [[Y]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_Y_XSHUF:%.*]] = mul <2 x i4> [[YSHUF]], [[X]] -; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_Y_XSHUF]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<2 x i4> [[B_XY_SPLAT]]) ; CHECK-NEXT: call void @use(<2 x i4> [[B_Y_XSHUF]]) ; CHECK-NEXT: ret void @@ -932,6 +928,8 @@ ret void } +; negative test - wrong operands for sub + define void @common_binop_demand_via_splat_op0_wrong_commute(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_splat_op0_wrong_commute( ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer @@ -951,6 +949,8 @@ ret void } +; negative test - need to reorder insts? + define void @common_binop_demand_via_splat_op0_not_dominated1(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_splat_op0_not_dominated1( ; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X:%.*]], [[Y:%.*]] @@ -970,6 +970,8 @@ ret void } +; negative test - need to reorder insts? + define void @common_binop_demand_via_splat_op0_not_dominated2(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_splat_op0_not_dominated2( ; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X:%.*]], [[Y:%.*]] @@ -993,8 +995,7 @@ ; CHECK-LABEL: @common_binop_demand_via_extelt_op0( ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_XSHUF_Y:%.*]] = sub <2 x i4> [[XSHUF]], [[Y:%.*]] -; CHECK-NEXT: [[B_XY:%.*]] = sub nsw <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x i4> [[B_XY]], i64 0 +; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x i4> [[B_XSHUF_Y]], i64 0 ; CHECK-NEXT: call void @use(<2 x i4> [[B_XSHUF_Y]]) ; CHECK-NEXT: ret i4 [[B_XY0]] ; @@ -1011,8 +1012,7 @@ ; CHECK-NEXT: [[X:%.*]] = fsub <2 x float> , [[P:%.*]] ; CHECK-NEXT: [[YSHUF:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_X_YSHUF:%.*]] = fdiv <2 x float> [[X]], [[YSHUF]] -; CHECK-NEXT: [[B_XY:%.*]] = fdiv <2 x float> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x float> [[B_XY]], i64 0 +; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x float> [[B_X_YSHUF]], i64 0 ; CHECK-NEXT: call void @use_fp(<2 x float> [[B_X_YSHUF]]) ; CHECK-NEXT: ret float [[B_XY0]] ; @@ -1027,12 +1027,11 @@ define float @common_binop_demand_via_extelt_op0_commute(<2 x float> %p, <2 x float> %q) { ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_commute( -; CHECK-NEXT: [[X:%.*]] = fsub <2 x float> , [[P:%.*]] +; CHECK-NEXT: [[X:%.*]] = fsub <2 x float> , [[P:%.*]] ; CHECK-NEXT: [[Y:%.*]] = fsub <2 x float> , [[Q:%.*]] ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_Y_XSHUF:%.*]] = fmul nnan <2 x float> [[Y]], [[XSHUF]] -; CHECK-NEXT: [[B_XY:%.*]] = fmul ninf <2 x float> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x float> [[B_XY]], i64 0 +; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x float> [[B_Y_XSHUF]], i64 0 ; CHECK-NEXT: call void @use_fp(<2 x float> [[B_Y_XSHUF]]) ; CHECK-NEXT: ret float [[B_XY0]] ; @@ -1049,11 +1048,10 @@ define i4 @common_binop_demand_via_extelt_op1_commute(<2 x i4> %p, <2 x i4> %q) { ; CHECK-LABEL: @common_binop_demand_via_extelt_op1_commute( ; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> , [[P:%.*]] -; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> , [[Q:%.*]] +; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> , [[Q:%.*]] ; CHECK-NEXT: [[YSHUF:%.*]] = shufflevector <2 x i4> [[Y]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_Y_XSHUF:%.*]] = or <2 x i4> [[YSHUF]], [[X]] -; CHECK-NEXT: [[B_XY:%.*]] = or <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x i4> [[B_XY]], i64 0 +; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x i4> [[B_Y_XSHUF]], i64 0 ; CHECK-NEXT: call void @use(<2 x i4> [[B_Y_XSHUF]]) ; CHECK-NEXT: ret i4 [[B_XY0]] ; @@ -1067,6 +1065,8 @@ ret i4 %b_xy0 } +; negative test - wrong operands for sub + define i4 @common_binop_demand_via_extelt_op0_wrong_commute(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_wrong_commute( ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer @@ -1084,6 +1084,8 @@ ret i4 %b_xy0 } +; negative test - need to reorder insts? + define i4 @common_binop_demand_via_extelt_op0_not_dominated1(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_not_dominated1( ; CHECK-NEXT: [[B_XY:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] @@ -1101,6 +1103,8 @@ ret i4 %b_xy0 } +; negative test - need to reorder insts? + define i4 @common_binop_demand_via_extelt_op0_not_dominated2(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_not_dominated2( ; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X:%.*]], [[Y:%.*]] @@ -1118,6 +1122,8 @@ ret i4 %b_xy0 } +; negative test - splat doesn't match demanded element + define i4 @common_binop_demand_via_extelt_op0_mismatch_elt0(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_mismatch_elt0( ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> @@ -1135,6 +1141,8 @@ ret i4 %b_xy0 } +; negative test - splat doesn't match demanded element + define i4 @common_binop_demand_via_extelt_op0_mismatch_elt1(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_mismatch_elt1( ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer