Index: llvm/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/lib/Analysis/InstructionSimplify.cpp +++ llvm/lib/Analysis/InstructionSimplify.cpp @@ -4248,29 +4248,52 @@ Pred = ICmpInst::getInversePredicate(Pred); } - // (X pred Y) ? X : max/min(X, Y) + // A vector select may be shuffling together elements that are equivalent + // based on the max/min/select relationship. Value *X = CmpLHS, *Y = CmpRHS; + bool PeekedThroughSelectShuffle = false; + auto *Shuf = dyn_cast(FVal); + if (Shuf && Shuf->isSelect()) { + if (Shuf->getOperand(0) == Y) + FVal = Shuf->getOperand(1); + else if (Shuf->getOperand(1) == Y) + FVal = Shuf->getOperand(0); + else + return nullptr; + PeekedThroughSelectShuffle = true; + } + + // (X pred Y) ? X : max/min(X, Y) auto *MMI = dyn_cast(FVal); if (!MMI || TVal != X || !match(FVal, m_c_MaxOrMin(m_Specific(X), m_Specific(Y)))) return nullptr; - // (X == Y) ? X : max/min(X, Y) --> max/min(X, Y) - if (Pred == CmpInst::ICMP_EQ) - return MMI; - - // (X != Y) ? X : max/min(X, Y) --> X - if (Pred == CmpInst::ICMP_NE) - return X; - // (X > Y) ? X : max(X, Y) --> max(X, Y) // (X >= Y) ? X : max(X, Y) --> max(X, Y) // (X < Y) ? X : min(X, Y) --> min(X, Y) // (X <= Y) ? X : min(X, Y) --> min(X, Y) + // + // The equivalence allows a vector select (shuffle) of max/min and Y. Ex: + // (X > Y) ? X : (Z ? max(X, Y) : Y) + // If Z is true, this reduces as above, and if Z is false: + // (X > Y) ? X : Y --> max(X, Y) ICmpInst::Predicate MMPred = MMI->getPredicate(); if (MMPred == CmpInst::getStrictPredicate(Pred)) return MMI; + // Other transforms are not valid with a shuffle. + if (PeekedThroughSelectShuffle) + return nullptr; + + // (X == Y) ? X : max/min(X, Y) --> max/min(X, Y) + if (Pred == CmpInst::ICMP_EQ) + return MMI; + + // (X != Y) ? X : max/min(X, Y) --> X + if (Pred == CmpInst::ICMP_NE) + return X; + // (X < Y) ? X : max(X, Y) --> X // (X <= Y) ? X : max(X, Y) --> X // (X > Y) ? X : min(X, Y) --> X Index: llvm/test/Transforms/InstSimplify/select-maxmin.ll =================================================================== --- llvm/test/Transforms/InstSimplify/select-maxmin.ll +++ llvm/test/Transforms/InstSimplify/select-maxmin.ll @@ -1942,13 +1942,12 @@ ret i8 %r } +; select with smin pred + define <4 x i8> @slt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @slt_xy_smin_select_y_shuf_fval( -; CHECK-NEXT: [[I:%.*]] = icmp slt <4 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]] -; CHECK-NEXT: ret <4 x i8> [[R]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]]) +; CHECK-NEXT: ret <4 x i8> [[M]] ; %i = icmp slt <4 x i8> %x, %y %m = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -1957,6 +1956,8 @@ ret <4 x i8> %r } +; negative test - wrong pred + define <4 x i8> @sgt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @sgt_xy_smin_select_y_shuf_fval( ; CHECK-NEXT: [[I:%.*]] = icmp sgt <4 x i8> [[X:%.*]], [[Y:%.*]] @@ -1972,6 +1973,8 @@ ret <4 x i8> %r } +; negative test - wrong shuffle op + define <4 x i8> @slt_xy_smin_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @slt_xy_smin_select_x_shuf_fval( ; CHECK-NEXT: [[I:%.*]] = icmp slt <4 x i8> [[X:%.*]], [[Y:%.*]] @@ -1987,13 +1990,12 @@ ret <4 x i8> %r } +; select with non-strict smax pred + define <4 x i8> @sge_xy_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @sge_xy_smax_select_y_shuf_fval( -; CHECK-NEXT: [[I:%.*]] = icmp sge <4 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]] -; CHECK-NEXT: ret <4 x i8> [[R]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]]) +; CHECK-NEXT: ret <4 x i8> [[M]] ; %i = icmp sge <4 x i8> %x, %y %m = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %y, <4 x i8> %x) @@ -2002,6 +2004,8 @@ ret <4 x i8> %r } +; negative test - wrong (swapped) pred + define <4 x i8> @sle_yx_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @sle_yx_smax_select_y_shuf_fval( ; CHECK-NEXT: [[I:%.*]] = icmp sge <4 x i8> [[Y:%.*]], [[X:%.*]] @@ -2017,6 +2021,8 @@ ret <4 x i8> %r } +; negative test - wrong shuffle op + define <4 x i8> @sge_xy_smax_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @sge_xy_smax_select_x_shuf_fval( ; CHECK-NEXT: [[I:%.*]] = icmp sge <4 x i8> [[X:%.*]], [[Y:%.*]] @@ -2032,13 +2038,12 @@ ret <4 x i8> %r } +; select with non-strict inverted umin pred + define <4 x i8> @uge_xy_umin_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @uge_xy_umin_select_y_shuf_tval( -; CHECK-NEXT: [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[M]], <4 x i8> [[Y]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[S]], <4 x i8> [[X]] -; CHECK-NEXT: ret <4 x i8> [[R]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]]) +; CHECK-NEXT: ret <4 x i8> [[M]] ; %i = icmp uge <4 x i8> %x, %y %m = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -2047,6 +2052,8 @@ ret <4 x i8> %r } +; negative test - wrong pred + define <4 x i8> @uge_xy_umin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @uge_xy_umin_select_y_shuf_fval( ; CHECK-NEXT: [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]] @@ -2062,6 +2069,8 @@ ret <4 x i8> %r } +; negative test - wrong shuffle op + define <4 x i8> @uge_xy_umin_select_x_shuf_tval(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @uge_xy_umin_select_x_shuf_tval( ; CHECK-NEXT: [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]] @@ -2077,13 +2086,12 @@ ret <4 x i8> %r } +; select with swapped umax pred + define <4 x i8> @ult_yx_umax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @ult_yx_umax_select_y_shuf_fval( -; CHECK-NEXT: [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]] -; CHECK-NEXT: ret <4 x i8> [[R]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]]) +; CHECK-NEXT: ret <4 x i8> [[M]] ; %i = icmp ult <4 x i8> %y, %x %m = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %y, <4 x i8> %x) @@ -2092,6 +2100,8 @@ ret <4 x i8> %r } +; negative test - wrong (inverted+swapped) pred + define <4 x i8> @ult_yx_umax_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @ult_yx_umax_select_y_shuf_tval( ; CHECK-NEXT: [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]] @@ -2107,6 +2117,8 @@ ret <4 x i8> %r } +; negative test - wrong shuffle mask + define <4 x i8> @ult_yx_umax_select_y_shuf_mask_fval(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @ult_yx_umax_select_y_shuf_mask_fval( ; CHECK-NEXT: [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]] Index: llvm/test/Transforms/PhaseOrdering/vector-select.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/vector-select.ll +++ llvm/test/Transforms/PhaseOrdering/vector-select.ll @@ -93,16 +93,8 @@ define <4 x i32> @PR42100(<4 x i32> noundef %x, <4 x i32> noundef %min) { ; CHECK-LABEL: @PR42100( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[MIN:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X]], <4 x i32> [[MIN]]) -; CHECK-NEXT: [[MIN_ADDR_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[MIN]], <4 x i32> -; CHECK-NEXT: [[SEL3:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1]] -; CHECK-NEXT: [[MIN_ADDR_1_1:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1]], <4 x i32> [[SEL3]], <4 x i32> -; CHECK-NEXT: [[SEL4:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1_1]] -; CHECK-NEXT: [[MIN_ADDR_1_2:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1_1]], <4 x i32> [[SEL4]], <4 x i32> -; CHECK-NEXT: [[SEL5:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1_2]] -; CHECK-NEXT: [[MIN_ADDR_1_3:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1_2]], <4 x i32> [[SEL5]], <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[MIN_ADDR_1_3]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X:%.*]], <4 x i32> [[MIN:%.*]]) +; CHECK-NEXT: ret <4 x i32> [[TMP0]] ; entry: br label %for.cond