diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -959,6 +959,7 @@ SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1034,6 +1034,12 @@ setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom); setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom); setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom); + for (auto VT : {MVT::v1i64, MVT::v2i64}) { + setOperationAction(ISD::UMAX, VT, Custom); + setOperationAction(ISD::SMAX, VT, Custom); + setOperationAction(ISD::UMIN, VT, Custom); + setOperationAction(ISD::SMIN, VT, Custom); + } // AArch64 doesn't have MUL.2d: setOperationAction(ISD::MUL, MVT::v2i64, Expand); @@ -4777,17 +4783,10 @@ case ISD::UDIV: return LowerDIV(Op, DAG); case ISD::SMIN: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED, - /*OverrideNEON=*/true); case ISD::UMIN: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED, - /*OverrideNEON=*/true); case ISD::SMAX: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED, - /*OverrideNEON=*/true); case ISD::UMAX: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED, - /*OverrideNEON=*/true); + return LowerMinMax(Op, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: @@ -7083,6 +7082,56 @@ return DAG.getNode(ISD::CTLZ, DL, VT, RBIT); } +SDValue AArch64TargetLowering::LowerMinMax(SDValue Op, + SelectionDAG &DAG) const { + + EVT VT = Op.getValueType(); + SDLoc DL(Op); + unsigned Opcode = Op.getOpcode(); + ISD::CondCode CC; + switch (Opcode) { + default: + llvm_unreachable("Wrong instruction"); + case ISD::SMAX: + CC = ISD::SETGT; + break; + case ISD::SMIN: + CC = ISD::SETLT; + break; + case ISD::UMAX: + CC = ISD::SETUGT; + break; + case ISD::UMIN: + CC = ISD::SETULT; + break; + } + + if (VT.isScalableVector() || + useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) { + switch (Opcode) { + default: + llvm_unreachable("Wrong instruction"); + case ISD::SMAX: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED, + /*OverrideNEON=*/true); + case ISD::SMIN: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED, + /*OverrideNEON=*/true); + case ISD::UMAX: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED, + /*OverrideNEON=*/true); + case ISD::UMIN: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED, + /*OverrideNEON=*/true); + } + } + + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC); + return DAG.getSelect(DL, VT, Cond, Op0, Op1); +} + SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -233,6 +233,8 @@ static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32, MVT::v4i32}; auto LT = TLI->getTypeLegalizationCost(DL, RetTy); + if (LT.second == MVT::v2i64) + return LT.first * 2; if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; })) return LT.first; break; diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll --- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll @@ -96,8 +96,8 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1i8 = call <1 x i8> @llvm.smin.v1i8(<1 x i8> undef, <1 x i8> undef) @@ -135,8 +135,8 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1i8 = call <1 x i8> @llvm.smax.v1i8(<1 x i8> undef, <1 x i8> undef) diff --git a/llvm/test/CodeGen/AArch64/min-max.ll b/llvm/test/CodeGen/AArch64/min-max.ll --- a/llvm/test/CodeGen/AArch64/min-max.ll +++ b/llvm/test/CodeGen/AArch64/min-max.ll @@ -185,13 +185,8 @@ define <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-ISEL-LABEL: smax1i64: ; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-ISEL-NEXT: fmov x8, d1 -; CHECK-ISEL-NEXT: fmov x9, d0 -; CHECK-ISEL-NEXT: cmp x9, x8 -; CHECK-ISEL-NEXT: csel x8, x9, x8, gt -; CHECK-ISEL-NEXT: fmov d0, x8 +; CHECK-ISEL-NEXT: cmgt d2, d0, d1 +; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-ISEL-NEXT: ret ; ; CHECK-GLOBAL-LABEL: smax1i64: @@ -210,16 +205,8 @@ define <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-ISEL-LABEL: smax2i64: ; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: mov x8, v1.d[1] -; CHECK-ISEL-NEXT: mov x9, v0.d[1] -; CHECK-ISEL-NEXT: fmov x10, d1 -; CHECK-ISEL-NEXT: fmov x11, d0 -; CHECK-ISEL-NEXT: cmp x9, x8 -; CHECK-ISEL-NEXT: csel x8, x9, x8, gt -; CHECK-ISEL-NEXT: cmp x11, x10 -; CHECK-ISEL-NEXT: csel x9, x11, x10, gt -; CHECK-ISEL-NEXT: fmov d0, x9 -; CHECK-ISEL-NEXT: mov v0.d[1], x8 +; CHECK-ISEL-NEXT: cmgt v2.2d, v0.2d, v1.2d +; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-ISEL-NEXT: ret ; ; CHECK-GLOBAL-LABEL: smax2i64: @@ -238,26 +225,10 @@ define void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) { ; CHECK-ISEL-LABEL: smax4i64: ; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: mov x8, v2.d[1] -; CHECK-ISEL-NEXT: mov x9, v0.d[1] -; CHECK-ISEL-NEXT: fmov x10, d2 -; CHECK-ISEL-NEXT: fmov x11, d0 -; CHECK-ISEL-NEXT: cmp x9, x8 -; CHECK-ISEL-NEXT: csel x8, x9, x8, gt -; CHECK-ISEL-NEXT: cmp x11, x10 -; CHECK-ISEL-NEXT: mov x9, v3.d[1] -; CHECK-ISEL-NEXT: csel x10, x11, x10, gt -; CHECK-ISEL-NEXT: mov x11, v1.d[1] -; CHECK-ISEL-NEXT: cmp x11, x9 -; CHECK-ISEL-NEXT: fmov d0, x10 -; CHECK-ISEL-NEXT: fmov x10, d3 -; CHECK-ISEL-NEXT: csel x9, x11, x9, gt -; CHECK-ISEL-NEXT: fmov x11, d1 -; CHECK-ISEL-NEXT: cmp x11, x10 -; CHECK-ISEL-NEXT: csel x10, x11, x10, gt -; CHECK-ISEL-NEXT: fmov d1, x10 -; CHECK-ISEL-NEXT: mov v0.d[1], x8 -; CHECK-ISEL-NEXT: mov v1.d[1], x9 +; CHECK-ISEL-NEXT: cmgt v4.2d, v0.2d, v2.2d +; CHECK-ISEL-NEXT: cmgt v5.2d, v1.2d, v3.2d +; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b +; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b ; CHECK-ISEL-NEXT: stp q0, q1, [x0] ; CHECK-ISEL-NEXT: ret ; @@ -457,13 +428,8 @@ define <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-ISEL-LABEL: umax1i64: ; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-ISEL-NEXT: fmov x8, d1 -; CHECK-ISEL-NEXT: fmov x9, d0 -; CHECK-ISEL-NEXT: cmp x9, x8 -; CHECK-ISEL-NEXT: csel x8, x9, x8, hi -; CHECK-ISEL-NEXT: fmov d0, x8 +; CHECK-ISEL-NEXT: cmhi d2, d0, d1 +; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-ISEL-NEXT: ret ; ; CHECK-GLOBAL-LABEL: umax1i64: @@ -482,8 +448,8 @@ define <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-ISEL-LABEL: umax2i64: ; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: uqsub v1.2d, v1.2d, v0.2d -; CHECK-ISEL-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-ISEL-NEXT: cmhi v2.2d, v0.2d, v1.2d +; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-ISEL-NEXT: ret ; ; CHECK-GLOBAL-LABEL: umax2i64: @@ -502,10 +468,10 @@ define void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) { ; CHECK-ISEL-LABEL: umax4i64: ; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: uqsub v2.2d, v2.2d, v0.2d -; CHECK-ISEL-NEXT: uqsub v3.2d, v3.2d, v1.2d -; CHECK-ISEL-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-ISEL-NEXT: add v1.2d, v1.2d, v3.2d +; CHECK-ISEL-NEXT: cmhi v4.2d, v0.2d, v2.2d +; CHECK-ISEL-NEXT: cmhi v5.2d, v1.2d, v3.2d +; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b +; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b ; CHECK-ISEL-NEXT: stp q0, q1, [x0] ; CHECK-ISEL-NEXT: ret ; @@ -705,13 +671,8 @@ define <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-ISEL-LABEL: smin1i64: ; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-ISEL-NEXT: fmov x8, d1 -; CHECK-ISEL-NEXT: fmov x9, d0 -; CHECK-ISEL-NEXT: cmp x9, x8 -; CHECK-ISEL-NEXT: csel x8, x9, x8, lt -; CHECK-ISEL-NEXT: fmov d0, x8 +; CHECK-ISEL-NEXT: cmgt d2, d1, d0 +; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-ISEL-NEXT: ret ; ; CHECK-GLOBAL-LABEL: smin1i64: @@ -730,16 +691,8 @@ define <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-ISEL-LABEL: smin2i64: ; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: mov x8, v1.d[1] -; CHECK-ISEL-NEXT: mov x9, v0.d[1] -; CHECK-ISEL-NEXT: fmov x10, d1 -; CHECK-ISEL-NEXT: fmov x11, d0 -; CHECK-ISEL-NEXT: cmp x9, x8 -; CHECK-ISEL-NEXT: csel x8, x9, x8, lt -; CHECK-ISEL-NEXT: cmp x11, x10 -; CHECK-ISEL-NEXT: csel x9, x11, x10, lt -; CHECK-ISEL-NEXT: fmov d0, x9 -; CHECK-ISEL-NEXT: mov v0.d[1], x8 +; CHECK-ISEL-NEXT: cmgt v2.2d, v1.2d, v0.2d +; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-ISEL-NEXT: ret ; ; CHECK-GLOBAL-LABEL: smin2i64: @@ -758,26 +711,10 @@ define void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) { ; CHECK-ISEL-LABEL: smin4i64: ; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: mov x8, v2.d[1] -; CHECK-ISEL-NEXT: mov x9, v0.d[1] -; CHECK-ISEL-NEXT: fmov x10, d2 -; CHECK-ISEL-NEXT: fmov x11, d0 -; CHECK-ISEL-NEXT: cmp x9, x8 -; CHECK-ISEL-NEXT: csel x8, x9, x8, lt -; CHECK-ISEL-NEXT: cmp x11, x10 -; CHECK-ISEL-NEXT: mov x9, v3.d[1] -; CHECK-ISEL-NEXT: csel x10, x11, x10, lt -; CHECK-ISEL-NEXT: mov x11, v1.d[1] -; CHECK-ISEL-NEXT: cmp x11, x9 -; CHECK-ISEL-NEXT: fmov d0, x10 -; CHECK-ISEL-NEXT: fmov x10, d3 -; CHECK-ISEL-NEXT: csel x9, x11, x9, lt -; CHECK-ISEL-NEXT: fmov x11, d1 -; CHECK-ISEL-NEXT: cmp x11, x10 -; CHECK-ISEL-NEXT: csel x10, x11, x10, lt -; CHECK-ISEL-NEXT: fmov d1, x10 -; CHECK-ISEL-NEXT: mov v0.d[1], x8 -; CHECK-ISEL-NEXT: mov v1.d[1], x9 +; CHECK-ISEL-NEXT: cmgt v4.2d, v2.2d, v0.2d +; CHECK-ISEL-NEXT: cmgt v5.2d, v3.2d, v1.2d +; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b +; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b ; CHECK-ISEL-NEXT: stp q0, q1, [x0] ; CHECK-ISEL-NEXT: ret ; @@ -977,13 +914,8 @@ define <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-ISEL-LABEL: umin1i64: ; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-ISEL-NEXT: fmov x8, d1 -; CHECK-ISEL-NEXT: fmov x9, d0 -; CHECK-ISEL-NEXT: cmp x9, x8 -; CHECK-ISEL-NEXT: csel x8, x9, x8, lo -; CHECK-ISEL-NEXT: fmov d0, x8 +; CHECK-ISEL-NEXT: cmhi d2, d1, d0 +; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-ISEL-NEXT: ret ; ; CHECK-GLOBAL-LABEL: umin1i64: @@ -1002,8 +934,8 @@ define <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-ISEL-LABEL: umin2i64: ; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: uqsub v1.2d, v0.2d, v1.2d -; CHECK-ISEL-NEXT: sub v0.2d, v0.2d, v1.2d +; CHECK-ISEL-NEXT: cmhi v2.2d, v1.2d, v0.2d +; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-ISEL-NEXT: ret ; ; CHECK-GLOBAL-LABEL: umin2i64: @@ -1022,10 +954,10 @@ define void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) { ; CHECK-ISEL-LABEL: umin4i64: ; CHECK-ISEL: // %bb.0: -; CHECK-ISEL-NEXT: uqsub v2.2d, v0.2d, v2.2d -; CHECK-ISEL-NEXT: uqsub v3.2d, v1.2d, v3.2d -; CHECK-ISEL-NEXT: sub v0.2d, v0.2d, v2.2d -; CHECK-ISEL-NEXT: sub v1.2d, v1.2d, v3.2d +; CHECK-ISEL-NEXT: cmhi v4.2d, v2.2d, v0.2d +; CHECK-ISEL-NEXT: cmhi v5.2d, v3.2d, v1.2d +; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b +; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b ; CHECK-ISEL-NEXT: stp q0, q1, [x0] ; CHECK-ISEL-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/minmax.ll b/llvm/test/CodeGen/AArch64/minmax.ll --- a/llvm/test/CodeGen/AArch64/minmax.ll +++ b/llvm/test/CodeGen/AArch64/minmax.ll @@ -160,10 +160,10 @@ define <4 x i64> @t15(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: t15: ; CHECK: // %bb.0: -; CHECK-NEXT: cmhs v4.2d, v3.2d, v1.2d -; CHECK-NEXT: cmhs v5.2d, v2.2d, v0.2d -; CHECK-NEXT: bif v0.16b, v2.16b, v5.16b -; CHECK-NEXT: bif v1.16b, v3.16b, v4.16b +; CHECK-NEXT: cmhi v4.2d, v2.2d, v0.2d +; CHECK-NEXT: cmhi v5.2d, v3.2d, v1.2d +; CHECK-NEXT: bif v0.16b, v2.16b, v4.16b +; CHECK-NEXT: bif v1.16b, v3.16b, v5.16b ; CHECK-NEXT: ret %t1 = icmp ule <4 x i64> %a, %b %t2 = select <4 x i1> %t1, <4 x i64> %a, <4 x i64> %b diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll @@ -87,11 +87,11 @@ define i64 @test_v2i64(<2 x i64> %a) nounwind { ; CHECK-LABEL: test_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, v0.d[1] -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: csel x0, x9, x8, hi -; CHECK-NEXT: ret +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: cmhi d2, d0, d1 +; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %b = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a) ret i64 %b }