diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7478,10 +7478,26 @@ SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); EVT VT = Op0.getValueType(); + unsigned Opcode = Node->getOpcode(); + SDLoc DL(Node); + + // umin(x,y) -> sub(x,usubsat(x,y)) + if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) && + isOperationLegal(ISD::USUBSAT, VT)) { + return DAG.getNode(ISD::SUB, DL, VT, Op0, + DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1)); + } + + // umax(x,y) -> add(x,usubsat(y,x)) + if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) && + isOperationLegal(ISD::USUBSAT, VT)) { + return DAG.getNode(ISD::ADD, DL, VT, Op0, + DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0)); + } // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B ISD::CondCode CC; - switch (Node->getOpcode()) { + switch (Opcode) { default: llvm_unreachable("How did we get here?"); case ISD::SMAX: CC = ISD::SETGT; break; case ISD::SMIN: CC = ISD::SETLT; break; @@ -7494,7 +7510,6 @@ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT)) return DAG.UnrollVectorOp(Node); - SDLoc DL(Node); SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC); return DAG.getSelect(DL, VT, Cond, Op0, Op1); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -217,10 +217,17 @@ TTI::TargetCostKind CostKind) { auto *RetTy = ICA.getReturnType(); switch (ICA.getID()) { - case Intrinsic::smin: case Intrinsic::umin: - case Intrinsic::smax: case Intrinsic::umax: { + auto LT = TLI->getTypeLegalizationCost(DL, RetTy); + // umin(x,y) -> sub(x,usubsat(x,y)) + // umax(x,y) -> add(x,usubsat(y,x)) + if (LT.second == MVT::v2i64) + return LT.first * 2; + LLVM_FALLTHROUGH; + } + case Intrinsic::smin: + case Intrinsic::smax: { static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32, MVT::v4i32}; auto LT = TLI->getTypeLegalizationCost(DL, RetTy); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26959,22 +26959,6 @@ if (VT == MVT::v32i16 || VT == MVT::v64i8) return splitVectorIntBinary(Op, DAG); - SDLoc DL(Op); - unsigned Opcode = Op.getOpcode(); - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - - // For pre-SSE41, we can perform UMIN/UMAX v8i16 by using psubusw. - if (VT == MVT::v8i16) { - assert((Opcode == ISD::UMIN || Opcode == ISD::UMAX) && - "Unexpected MIN/MAX opcode"); - if (Opcode == ISD::UMIN) - return DAG.getNode(ISD::SUB, DL, VT, N0, - DAG.getNode(ISD::USUBSAT, DL, VT, N0, N1)); - return DAG.getNode(ISD::ADD, DL, VT, - DAG.getNode(ISD::USUBSAT, DL, VT, N1, N0), N0); - } - // Default to expand. return SDValue(); } diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll --- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll @@ -115,12 +115,13 @@ } ; COST-LABEL: umin.v2i64 -; COST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %v0, <2 x i64> %v1) +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %v0, <2 x i64> %v1) ; CODE-LABEL: umin.v2i64 ; CODE: bb.0 -; CODE: csel -; CODE: csel +; CODE-NEXT: uqsub v{{.*}}.2d, v{{.*}}.2d, v{{.*}}.2d +; CODE-NEXT: sub v{{.*}}.2d, v{{.*}}.2d, v{{.*}}.2d +; CODE-NEXT: ret declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>) define <2 x i64> @umin.v2i64(<2 x i64> %v0, <2 x i64> %v1) { @@ -369,12 +370,13 @@ } ; COST-LABEL: umax.v2i64 -; COST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %v0, <2 x i64> %v1) +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %v0, <2 x i64> %v1) ; CODE-LABEL: umax.v2i64 ; CODE: bb.0 -; CODE: csel -; CODE: csel +; CODE-NEXT: uqsub v{{.*}}.2d, v{{.*}}.2d, v{{.*}}.2d +; CODE-NEXT: add v{{.*}}.2d, v{{.*}}.2d, v{{.*}}.2d +; CODE-NEXT: ret declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>) define <2 x i64> @umax.v2i64(<2 x i64> %v0, <2 x i64> %v1) { diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll --- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll @@ -2179,8 +2179,8 @@ ; SSE2-NEXT: psubusw %xmm1, %xmm2 ; SSE2-NEXT: psubusw %xmm0, %xmm1 ; SSE2-NEXT: psubw %xmm0, %xmm2 -; SSE2-NEXT: paddw %xmm0, %xmm2 ; SSE2-NEXT: paddw %xmm1, %xmm2 +; SSE2-NEXT: paddw %xmm0, %xmm2 ; SSE2-NEXT: psrlw $1, %xmm2 ; SSE2-NEXT: pmullw %xmm3, %xmm2 ; SSE2-NEXT: paddw %xmm0, %xmm2