Index: llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1713,6 +1713,49 @@ // UB/poison potential, but that should be refined. BinaryOperator *BO; if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) { + Value *X = BO->getOperand(0); + Value *Y = BO->getOperand(1); + + // Look for an equivalent binop except that one operand has been shuffled. + // If the demand for this binop only includes elements that are the same as + // the other binop, then we may be able to replace this binop with a use of + // the earlier one. + // + // Example: + // %other_bo = bo (shuf X, {0}), Y + // %this_extracted_bo = extelt (bo X, Y), 0 + // --> + // %other_bo = bo (shuf X, {0}), Y + // %this_extracted_bo = extelt %other_bo, 0 + // + // TODO: Handle demand of an arbitrary single element or more than one + // element instead of just element 0. + if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() && + BO->hasOneUse() ) { + BinaryOperator::BinaryOps Opcode = BO->getOpcode(); + bool Commute = BO->isCommutative(); + + // Try to use shuffle-of-Y in place of Y: + // bo X, Y --> bo X, (shuf Y) + for (User *U : X->users()) { + auto Shuf = m_Shuffle(m_Specific(Y), m_Value(), m_ZeroMask()); + if (match(U, m_BinOp(Opcode, m_Specific(X), Shuf)) || + (Commute && match(U, m_BinOp(Opcode, Shuf, m_Specific(X))))) + if (DT.dominates(U, I)) + return U; + } + + // Try to use shuffle-of-X in place of X: + // bo X, Y --> bo (shuf X), Y + for (User *U : Y->users()) { + auto Shuf = m_Shuffle(m_Specific(X), m_Value(), m_ZeroMask()); + if (match(U, m_BinOp(Opcode, Shuf, m_Specific(Y))) || + (Commute && match(U, m_BinOp(Opcode, m_Specific(Y), Shuf)))) + if (DT.dominates(U, I)) + return U; + } + } + simplifyAndSetOp(I, 0, DemandedElts, UndefElts); simplifyAndSetOp(I, 1, DemandedElts, UndefElts2); Index: llvm/test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -850,8 +850,7 @@ ; CHECK-LABEL: @common_binop_demand_via_splat_op0( ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_XSHUF_Y:%.*]] = mul <2 x i4> [[XSHUF]], [[Y:%.*]] -; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XSHUF_Y]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<2 x i4> [[B_XSHUF_Y]]) ; CHECK-NEXT: call void @use(<2 x i4> [[B_XY_SPLAT]]) ; CHECK-NEXT: ret void @@ -870,8 +869,7 @@ ; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> , [[P:%.*]] ; CHECK-NEXT: [[YSHUF:%.*]] = shufflevector <2 x i4> [[Y:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_X_YSHUF:%.*]] = mul <2 x i4> [[X]], [[YSHUF]] -; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_X_YSHUF]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<2 x i4> [[B_XY_SPLAT]]) ; CHECK-NEXT: call void @use(<2 x i4> [[B_X_YSHUF]]) ; CHECK-NEXT: ret void @@ -888,12 +886,11 @@ define void @common_binop_demand_via_splat_op0_commute(<2 x i4> %p, <2 x i4> %q) { ; CHECK-LABEL: @common_binop_demand_via_splat_op0_commute( -; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> , [[P:%.*]] +; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> , [[P:%.*]] ; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> , [[Q:%.*]] ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_Y_XSHUF:%.*]] = mul <2 x i4> [[Y]], [[XSHUF]] -; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_Y_XSHUF]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<2 x i4> [[B_XY_SPLAT]]) ; CHECK-NEXT: call void @use(<2 x i4> [[B_Y_XSHUF]]) ; CHECK-NEXT: ret void @@ -912,11 +909,10 @@ define void @common_binop_demand_via_splat_op1_commute(<2 x i4> %p, <2 x i4> %q) { ; CHECK-LABEL: @common_binop_demand_via_splat_op1_commute( ; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> , [[P:%.*]] -; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> , [[Q:%.*]] +; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> , [[Q:%.*]] ; CHECK-NEXT: [[YSHUF:%.*]] = shufflevector <2 x i4> [[Y]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_Y_XSHUF:%.*]] = mul <2 x i4> [[YSHUF]], [[X]] -; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_Y_XSHUF]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<2 x i4> [[B_XY_SPLAT]]) ; CHECK-NEXT: call void @use(<2 x i4> [[B_Y_XSHUF]]) ; CHECK-NEXT: ret void @@ -932,6 +928,8 @@ ret void } +; negative test - wrong operands for sub + define void @common_binop_demand_via_splat_op0_wrong_commute(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_splat_op0_wrong_commute( ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer @@ -951,6 +949,8 @@ ret void } +; negative test - need to reorder insts? + define void @common_binop_demand_via_splat_op0_not_dominated1(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_splat_op0_not_dominated1( ; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X:%.*]], [[Y:%.*]] @@ -970,6 +970,8 @@ ret void } +; negative test - need to reorder insts? + define void @common_binop_demand_via_splat_op0_not_dominated2(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_splat_op0_not_dominated2( ; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X:%.*]], [[Y:%.*]] @@ -993,8 +995,7 @@ ; CHECK-LABEL: @common_binop_demand_via_extelt_op0( ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_XSHUF_Y:%.*]] = sub <2 x i4> [[XSHUF]], [[Y:%.*]] -; CHECK-NEXT: [[B_XY:%.*]] = sub nsw <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x i4> [[B_XY]], i64 0 +; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x i4> [[B_XSHUF_Y]], i64 0 ; CHECK-NEXT: call void @use(<2 x i4> [[B_XSHUF_Y]]) ; CHECK-NEXT: ret i4 [[B_XY0]] ; @@ -1011,8 +1012,7 @@ ; CHECK-NEXT: [[X:%.*]] = fsub <2 x float> , [[P:%.*]] ; CHECK-NEXT: [[YSHUF:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_X_YSHUF:%.*]] = fdiv <2 x float> [[X]], [[YSHUF]] -; CHECK-NEXT: [[B_XY:%.*]] = fdiv <2 x float> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x float> [[B_XY]], i64 0 +; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x float> [[B_X_YSHUF]], i64 0 ; CHECK-NEXT: call void @use_fp(<2 x float> [[B_X_YSHUF]]) ; CHECK-NEXT: ret float [[B_XY0]] ; @@ -1027,12 +1027,11 @@ define float @common_binop_demand_via_extelt_op0_commute(<2 x float> %p, <2 x float> %q) { ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_commute( -; CHECK-NEXT: [[X:%.*]] = fsub <2 x float> , [[P:%.*]] +; CHECK-NEXT: [[X:%.*]] = fsub <2 x float> , [[P:%.*]] ; CHECK-NEXT: [[Y:%.*]] = fsub <2 x float> , [[Q:%.*]] ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_Y_XSHUF:%.*]] = fmul nnan <2 x float> [[Y]], [[XSHUF]] -; CHECK-NEXT: [[B_XY:%.*]] = fmul ninf <2 x float> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x float> [[B_XY]], i64 0 +; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x float> [[B_Y_XSHUF]], i64 0 ; CHECK-NEXT: call void @use_fp(<2 x float> [[B_Y_XSHUF]]) ; CHECK-NEXT: ret float [[B_XY0]] ; @@ -1049,11 +1048,10 @@ define i4 @common_binop_demand_via_extelt_op1_commute(<2 x i4> %p, <2 x i4> %q) { ; CHECK-LABEL: @common_binop_demand_via_extelt_op1_commute( ; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> , [[P:%.*]] -; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> , [[Q:%.*]] +; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> , [[Q:%.*]] ; CHECK-NEXT: [[YSHUF:%.*]] = shufflevector <2 x i4> [[Y]], <2 x i4> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[B_Y_XSHUF:%.*]] = or <2 x i4> [[YSHUF]], [[X]] -; CHECK-NEXT: [[B_XY:%.*]] = or <2 x i4> [[X]], [[Y]] -; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x i4> [[B_XY]], i64 0 +; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x i4> [[B_Y_XSHUF]], i64 0 ; CHECK-NEXT: call void @use(<2 x i4> [[B_Y_XSHUF]]) ; CHECK-NEXT: ret i4 [[B_XY0]] ; @@ -1067,6 +1065,8 @@ ret i4 %b_xy0 } +; negative test - wrong operands for sub + define i4 @common_binop_demand_via_extelt_op0_wrong_commute(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_wrong_commute( ; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer @@ -1084,6 +1084,8 @@ ret i4 %b_xy0 } +; negative test - need to reorder insts? + define i4 @common_binop_demand_via_extelt_op0_not_dominated1(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_not_dominated1( ; CHECK-NEXT: [[B_XY:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] @@ -1101,6 +1103,8 @@ ret i4 %b_xy0 } +; negative test - need to reorder insts? + define i4 @common_binop_demand_via_extelt_op0_not_dominated2(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_not_dominated2( ; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X:%.*]], [[Y:%.*]]