Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -678,6 +678,12 @@ setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); + // [SU][MIN|MAX] are available for all NEON types apart from i64. + if (!VT.isFloatingPoint() && + VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64) + for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) + setOperationAction(Opcode, VT.getSimpleVT(), Legal); + if (Subtarget->isLittleEndian()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -2809,6 +2809,55 @@ BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; +def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)), + (SMINv8i8 V64:$Rn, V64:$Rm)>; +def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)), + (SMINv4i16 V64:$Rn, V64:$Rm)>; +def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)), + (SMINv2i32 V64:$Rn, V64:$Rm)>; +def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)), + (SMINv16i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)), + (SMINv8i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)), + (SMINv4i32 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)), + (SMAXv8i8 V64:$Rn, V64:$Rm)>; +def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)), + (SMAXv4i16 V64:$Rn, V64:$Rm)>; +def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)), + (SMAXv2i32 V64:$Rn, V64:$Rm)>; +def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)), + (SMAXv16i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)), + (SMAXv8i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)), + (SMAXv4i32 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)), + (UMINv8i8 V64:$Rn, V64:$Rm)>; +def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)), + (UMINv4i16 V64:$Rn, V64:$Rm)>; +def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)), + (UMINv2i32 V64:$Rn, V64:$Rm)>; +def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)), + (UMINv16i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)), + (UMINv8i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)), + (UMINv4i32 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)), + (UMAXv8i8 V64:$Rn, V64:$Rm)>; +def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)), + (UMAXv4i16 V64:$Rn, V64:$Rm)>; +def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)), + (UMAXv2i32 V64:$Rn, V64:$Rm)>; +def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)), + (UMAXv16i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)), + (UMAXv8i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)), + (UMAXv4i32 V128:$Rn, V128:$Rm)>; + def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), Index: test/CodeGen/AArch64/minmax.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/minmax.ll @@ -0,0 +1,96 @@ +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s + +; CHECK-LABEL: t1 +; CHECK: smax +define <4 x i32> @t1(<4 x i32> %a, <4 x i32> %b) { + %t1 = icmp sgt <4 x i32> %a, %b + %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %t2 +} + +; CHECK-LABEL: t2 +; CHECK: smin +define <4 x i32> @t2(<4 x i32> %a, <4 x i32> %b) { + %t1 = icmp slt <4 x i32> %a, %b + %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %t2 +} + +; CHECK-LABEL: t3 +; CHECK: umax +define <4 x i32> @t3(<4 x i32> %a, <4 x i32> %b) { + %t1 = icmp ugt <4 x i32> %a, %b + %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %t2 +} + +; CHECK-LABEL: t4 +; CHECK: umin +define <8 x i8> @t4(<8 x i8> %a, <8 x i8> %b) { + %t1 = icmp ult <8 x i8> %a, %b + %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b + ret <8 x i8> %t2 +} + +; CHECK-LABEL: t5 +; CHECK: smin +define <4 x i16> @t5(<4 x i16> %a, <4 x i16> %b) { + %t1 = icmp sgt <4 x i16> %b, %a + %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b + ret <4 x i16> %t2 +} + +; CHECK-LABEL: t6 +; CHECK: smax +define <2 x i32> @t6(<2 x i32> %a, <2 x i32> %b) { + %t1 = icmp slt <2 x i32> %b, %a + %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %t2 +} + +; CHECK-LABEL: t7 +; CHECK: umin +define <16 x i8> @t7(<16 x i8> %a, <16 x i8> %b) { + %t1 = icmp ugt <16 x i8> %b, %a + %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %t2 +} + +; CHECK-LABEL: t8 +; CHECK: umax +define <8 x i16> @t8(<8 x i16> %a, <8 x i16> %b) { + %t1 = icmp ult <8 x i16> %b, %a + %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %t2 +} + +; CHECK-LABEL: t9 +; CHECK: umin +; CHECK: smax +define <4 x i32> @t9(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %t1 = icmp ugt <4 x i32> %b, %a + %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b + %t3 = icmp sge <4 x i32> %t2, %c + %t4 = select <4 x i1> %t3, <4 x i32> %t2, <4 x i32> %c + ret <4 x i32> %t4 +} + +; CHECK-LABEL: t10 +; CHECK: smax +; CHECK: smax +define <8 x i32> @t10(<8 x i32> %a, <8 x i32> %b) { + %t1 = icmp sgt <8 x i32> %a, %b + %t2 = select <8 x i1> %t1, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %t2 +} + +; CHECK-LABEL: t11 +; CHECK: smin +; CHECK: smin +; CHECK: smin +; CHECK: smin +define <16 x i32> @t11(<16 x i32> %a, <16 x i32> %b) { + %t1 = icmp sle <16 x i32> %a, %b + %t2 = select <16 x i1> %t1, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %t2 +}