diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19552,8 +19552,8 @@ auto *IndexC = dyn_cast(Index); if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && V.getOperand(0).getValueType() == SubVT && - (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) { - uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements(); + (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) { + uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements(); return V.getOperand(SubIdx); } return SDValue(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1130,6 +1130,8 @@ setOperationAction(ISD::FADD, VT, Custom); setOperationAction(ISD::FDIV, VT, Custom); setOperationAction(ISD::FMA, VT, Custom); + setOperationAction(ISD::FMAXNUM, VT, Custom); + setOperationAction(ISD::FMINNUM, VT, Custom); setOperationAction(ISD::FMUL, VT, Custom); setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FSUB, VT, Custom); diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll --- a/llvm/test/CodeGen/AArch64/sve-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp.ll @@ -592,6 +592,194 @@ ret %res } +; maxnum minnum + +define @maxnum_nxv16f16( %a, %b) { +; CHECK-LABEL: maxnum_nxv16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: ret + %res = call @llvm.maxnum.nxv16f16( %a, %b) + ret %res +} + +define @maxnum_nxv8f16( %a, %b) { +; CHECK-LABEL: maxnum_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = call @llvm.maxnum.nxv8f16( %a, %b) + ret %res +} + +define @maxnum_nxv4f16( %a, %b) { +; CHECK-LABEL: maxnum_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = call @llvm.maxnum.nxv4f16( %a, %b) + ret %res +} + +define @maxnum_nxv2f16( %a, %b) { +; CHECK-LABEL: maxnum_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = call @llvm.maxnum.nxv2f16( %a, %b) + ret %res +} + +define @maxnum_nxv8f32( %a, %b) { +; CHECK-LABEL: maxnum_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: ret + %res = call @llvm.maxnum.nxv8f32( %a, %b) + ret %res +} + +define @maxnum_nxv4f32( %a, %b) { +; CHECK-LABEL: maxnum_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %res = call @llvm.maxnum.nxv4f32( %a, %b) + ret %res +} + +define @maxnum_nxv2f32( %a, %b) { +; CHECK-LABEL: maxnum_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %res = call @llvm.maxnum.nxv2f32( %a, %b) + ret %res +} + +define @maxnum_nxv4f64( %a, %b) { +; CHECK-LABEL: maxnum_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: fmaxnm z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: ret + %res = call @llvm.maxnum.nxv4f64( %a, %b) + ret %res +} + +define @maxnum_nxv2f64( %a, %b) { +; CHECK-LABEL: maxnum_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %res = call @llvm.maxnum.nxv2f64( %a, %b) + ret %res +} + +define @minnum_nxv16f16( %a, %b) { +; CHECK-LABEL: minnum_nxv16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: ret + %res = call @llvm.minnum.nxv16f16( %a, %b) + ret %res +} + +define @minnum_nxv8f16( %a, %b) { +; CHECK-LABEL: minnum_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = call @llvm.minnum.nxv8f16( %a, %b) + ret %res +} + +define @minnum_nxv4f16( %a, %b) { +; CHECK-LABEL: minnum_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = call @llvm.minnum.nxv4f16( %a, %b) + ret %res +} + +define @minnum_nxv2f16( %a, %b) { +; CHECK-LABEL: minnum_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = call @llvm.minnum.nxv2f16( %a, %b) + ret %res +} + +define @minnum_nxv8f32( %a, %b) { +; CHECK-LABEL: minnum_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: ret + %res = call @llvm.minnum.nxv8f32( %a, %b) + ret %res +} + +define @minnum_nxv4f32( %a, %b) { +; CHECK-LABEL: minnum_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %res = call @llvm.minnum.nxv4f32( %a, %b) + ret %res +} + +define @minnum_nxv2f32( %a, %b) { +; CHECK-LABEL: minnum_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %res = call @llvm.minnum.nxv2f32( %a, %b) + ret %res +} + +define @minnum_nxv4f64( %a, %b) { +; CHECK-LABEL: minnum_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: fminnm z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: ret + %res = call @llvm.minnum.nxv4f64( %a, %b) + ret %res +} + +define @minnum_nxv2f64( %a, %b) { +; CHECK-LABEL: minnum_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %res = call @llvm.minnum.nxv2f64( %a, %b) + ret %res +} + declare @llvm.aarch64.sve.frecps.x.nxv8f16(, ) declare @llvm.aarch64.sve.frecps.x.nxv4f32( , ) declare @llvm.aarch64.sve.frecps.x.nxv2f64(, ) @@ -621,5 +809,24 @@ declare @llvm.fabs.nxv2f32() declare @llvm.fabs.nxv2f64() +declare @llvm.maxnum.nxv16f16(, ) +declare @llvm.maxnum.nxv8f16(, ) +declare @llvm.maxnum.nxv4f16(, ) +declare @llvm.maxnum.nxv2f16(, ) +declare @llvm.maxnum.nxv8f32(, ) +declare @llvm.maxnum.nxv4f32(, ) +declare @llvm.maxnum.nxv2f32(, ) +declare @llvm.maxnum.nxv4f64(, ) +declare @llvm.maxnum.nxv2f64(, ) +declare @llvm.minnum.nxv16f16(, ) +declare @llvm.minnum.nxv8f16(, ) +declare @llvm.minnum.nxv4f16(, ) +declare @llvm.minnum.nxv2f16(, ) +declare @llvm.minnum.nxv8f32(, ) +declare @llvm.minnum.nxv4f32(, ) +declare @llvm.minnum.nxv2f32(, ) +declare @llvm.minnum.nxv4f64(, ) +declare @llvm.minnum.nxv2f64(, ) + ; Function Attrs: nounwind readnone declare double @llvm.aarch64.sve.faddv.nxv2f64(, ) #2