Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3422,30 +3422,54 @@ if (DstVT.isScalableVector()) return SDValue(); - // TODO: Saturate to SatWidth explicitly. - if (SatWidth != DstElementWidth) - return SDValue(); - EVT SrcElementVT = SrcVT.getVectorElementType(); - // In the absence of FP16 support, promote f16 to f32, like - // LowerVectorFP_TO_INT(). - if (SrcElementVT == MVT::f16 && !Subtarget->hasFullFP16()) { + // In the absence of FP16 support, promote f16 to f32 and saturate the result. + if (SrcElementVT == MVT::f16 && + (!Subtarget->hasFullFP16() || DstElementWidth > 16)) { MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements()); - return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal), - Op.getOperand(1)); - } + SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal); + SrcVT = F32VT; + SrcElementVT = MVT::f32; + SrcElementWidth = 32; + } else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 && + SrcElementVT != MVT::f16) + return SDValue(); + SDLoc DL(Op); // Cases that we can emit directly. - if ((SrcElementWidth == DstElementWidth) && - (SrcElementVT == MVT::f64 || SrcElementVT == MVT::f32 || - (SrcElementVT == MVT::f16 && Subtarget->hasFullFP16()))) { - return Op; + if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth) + return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, + DAG.getValueType(DstVT.getScalarType())); + + // Otherwise we emit a cvt that saturates to a higher BW, and saturate the + // result. This is only valid if the legal cvt is larger than the saturate + // width. For double, as we don't have MIN/MAX, it can be simpler to scalarize + // (at least until sqxtn is selected). + if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64) + return SDValue(); + + EVT IntVT = SrcVT.changeVectorElementTypeToInteger(); + SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal, + DAG.getValueType(IntVT.getScalarType())); + SDValue Sat; + if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) { + SDValue MinC = DAG.getConstant( + APInt::getSignedMaxValue(SatWidth).sextOrSelf(SrcElementWidth), DL, + IntVT); + SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC); + SDValue MaxC = DAG.getConstant( + APInt::getSignedMinValue(SatWidth).sextOrSelf(SrcElementWidth), DL, + IntVT); + Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC); + } else { + SDValue MinC = DAG.getConstant( + APInt::getAllOnesValue(SatWidth).zextOrSelf(SrcElementWidth), DL, + IntVT); + Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC); } - // For all other cases, fall back on the expanded form. - return SDValue(); + return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat); } SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, Index: llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -582,224 +582,98 @@ } define <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v2f16_v2i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_signed_v2f16_v2i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: fcvtzs w8, h0 -; CHECK-FP16-NEXT: fmov s0, w8 -; CHECK-FP16-NEXT: fcvtzs w8, h1 -; CHECK-FP16-NEXT: mov v0.s[1], w8 -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_signed_v2f16_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptosi.sat.v2f16.v2i32(<2 x half> %f) ret <2 x i32> %x } define <3 x i32> @test_signed_v3f16_v3i32(<3 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v3f16_v3i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_signed_v3f16_v3i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: fcvtzs w8, h0 -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzs w8, h2 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: mov v1.s[1], w8 -; CHECK-FP16-NEXT: fcvtzs w8, h2 -; CHECK-FP16-NEXT: mov v1.s[2], w8 -; CHECK-FP16-NEXT: fcvtzs w8, h0 -; CHECK-FP16-NEXT: mov v1.s[3], w8 -; CHECK-FP16-NEXT: mov v0.16b, v1.16b -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_signed_v3f16_v3i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptosi.sat.v3f16.v3i32(<3 x half> %f) ret <3 x i32> %x } define <4 x i32> @test_signed_v4f16_v4i32(<4 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v4f16_v4i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_signed_v4f16_v4i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: fcvtzs w8, h0 -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzs w8, h2 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: mov v1.s[1], w8 -; CHECK-FP16-NEXT: fcvtzs w8, h2 -; CHECK-FP16-NEXT: mov v1.s[2], w8 -; CHECK-FP16-NEXT: fcvtzs w8, h0 -; CHECK-FP16-NEXT: mov v1.s[3], w8 -; CHECK-FP16-NEXT: mov v0.16b, v1.16b -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_signed_v4f16_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptosi.sat.v4f16.v4i32(<4 x half> %f) ret <4 x i32> %x } define <5 x i32> @test_signed_v5f16_v5i32(<5 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v5f16_v5i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: mov w1, v1.s[1] -; CHECK-CVT-NEXT: mov w2, v1.s[2] -; CHECK-CVT-NEXT: mov w3, v1.s[3] -; CHECK-CVT-NEXT: fmov w0, s1 -; CHECK-CVT-NEXT: fmov w4, s0 -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_signed_v5f16_v5i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h4, v0.h[3] -; CHECK-FP16-NEXT: fcvtzs w0, h0 -; CHECK-FP16-NEXT: fcvtzs w1, h1 -; CHECK-FP16-NEXT: fcvtzs w2, h2 -; CHECK-FP16-NEXT: fcvtzs w4, h3 -; CHECK-FP16-NEXT: fcvtzs w3, h4 -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_signed_v5f16_v5i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: mov w1, v1.s[1] +; CHECK-NEXT: mov w2, v1.s[2] +; CHECK-NEXT: mov w3, v1.s[3] +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptosi.sat.v5f16.v5i32(<5 x half> %f) ret <5 x i32> %x } define <6 x i32> @test_signed_v6f16_v6i32(<6 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v6f16_v6i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: mov w1, v1.s[1] -; CHECK-CVT-NEXT: mov w2, v1.s[2] -; CHECK-CVT-NEXT: mov w3, v1.s[3] -; CHECK-CVT-NEXT: mov w5, v0.s[1] -; CHECK-CVT-NEXT: fmov w0, s1 -; CHECK-CVT-NEXT: fmov w4, s0 -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_signed_v6f16_v6i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h4, v0.h[3] -; CHECK-FP16-NEXT: fcvtzs w0, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] -; CHECK-FP16-NEXT: fcvtzs w8, h1 -; CHECK-FP16-NEXT: fcvtzs w2, h3 -; CHECK-FP16-NEXT: fcvtzs w3, h4 -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzs w5, h2 -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: mov v1.s[1], w5 -; CHECK-FP16-NEXT: fcvtzs w1, h2 -; CHECK-FP16-NEXT: fmov w4, s1 -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_signed_v6f16_v6i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: mov w1, v1.s[1] +; CHECK-NEXT: mov w2, v1.s[2] +; CHECK-NEXT: mov w3, v1.s[3] +; CHECK-NEXT: mov w5, v0.s[1] +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptosi.sat.v6f16.v6i32(<6 x half> %f) ret <6 x i32> %x } define <7 x i32> @test_signed_v7f16_v7i32(<7 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v7f16_v7i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: mov w1, v1.s[1] -; CHECK-CVT-NEXT: mov w2, v1.s[2] -; CHECK-CVT-NEXT: mov w3, v1.s[3] -; CHECK-CVT-NEXT: mov w5, v0.s[1] -; CHECK-CVT-NEXT: mov w6, v0.s[2] -; CHECK-CVT-NEXT: fmov w0, s1 -; CHECK-CVT-NEXT: fmov w4, s0 -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_signed_v7f16_v7i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h4, v0.h[3] -; CHECK-FP16-NEXT: fcvtzs w0, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] -; CHECK-FP16-NEXT: fcvtzs w8, h1 -; CHECK-FP16-NEXT: mov h1, v1.h[2] -; CHECK-FP16-NEXT: fcvtzs w3, h4 -; CHECK-FP16-NEXT: fmov s3, w8 -; CHECK-FP16-NEXT: fcvtzs w8, h2 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: fcvtzs w6, h1 -; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: mov v3.s[1], w8 -; CHECK-FP16-NEXT: fcvtzs w2, h2 -; CHECK-FP16-NEXT: fcvtzs w1, h1 -; CHECK-FP16-NEXT: mov v3.s[2], w6 -; CHECK-FP16-NEXT: mov w5, v3.s[1] -; CHECK-FP16-NEXT: fmov w4, s3 -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_signed_v7f16_v7i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: mov w1, v1.s[1] +; CHECK-NEXT: mov w2, v1.s[2] +; CHECK-NEXT: mov w3, v1.s[3] +; CHECK-NEXT: mov w5, v0.s[1] +; CHECK-NEXT: mov w6, v0.s[2] +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptosi.sat.v7f16.v7i32(<7 x half> %f) ret <7 x i32> %x } define <8 x i32> @test_signed_v8f16_v8i32(<8 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v8f16_v8i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_signed_v8f16_v8i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h4, v0.h[1] -; CHECK-FP16-NEXT: fcvtzs w9, h0 -; CHECK-FP16-NEXT: mov h2, v3.h[1] -; CHECK-FP16-NEXT: fcvtzs w8, h3 -; CHECK-FP16-NEXT: mov h5, v3.h[2] -; CHECK-FP16-NEXT: mov h3, v3.h[3] -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzs w8, h2 -; CHECK-FP16-NEXT: fmov s2, w9 -; CHECK-FP16-NEXT: fcvtzs w9, h4 -; CHECK-FP16-NEXT: mov h4, v0.h[2] -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: mov v1.s[1], w8 -; CHECK-FP16-NEXT: fcvtzs w8, h5 -; CHECK-FP16-NEXT: mov v2.s[1], w9 -; CHECK-FP16-NEXT: fcvtzs w9, h4 -; CHECK-FP16-NEXT: mov v1.s[2], w8 -; CHECK-FP16-NEXT: fcvtzs w8, h3 -; CHECK-FP16-NEXT: mov v2.s[2], w9 -; CHECK-FP16-NEXT: fcvtzs w9, h0 -; CHECK-FP16-NEXT: mov v1.s[3], w8 -; CHECK-FP16-NEXT: mov v2.s[3], w9 -; CHECK-FP16-NEXT: mov v0.16b, v2.16b -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_signed_v8f16_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret %x = call <8 x i32> @llvm.fptosi.sat.v8f16.v8i32(<8 x half> %f) ret <8 x i32> %x } @@ -821,23 +695,11 @@ define <2 x i1> @test_signed_v2f32_v2i1(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmov s2, #-1.00000000 -; CHECK-NEXT: movi d3, #0000000000000000 -; CHECK-NEXT: fmaxnm s4, s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fminnm s4, s4, s3 -; CHECK-NEXT: fminnm s1, s2, s3 -; CHECK-NEXT: fcvtzs w8, s4 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i1> @llvm.fptosi.sat.v2f32.v2i1(<2 x float> %f) ret <2 x i1> %x @@ -846,25 +708,11 @@ define <2 x i8> @test_signed_v2f32_v2i8(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1023410176 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #1123942400 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2s, #127 +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s +; CHECK-NEXT: mvni v1.2s, #127 +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i8> @llvm.fptosi.sat.v2f32.v2i8(<2 x float> %f) ret <2 x i8> %x @@ -873,26 +721,11 @@ define <2 x i13> @test_signed_v2f32_v2i13(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-981467136 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #61440 -; CHECK-NEXT: movk w8, #17791, lsl #16 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2s, #15, msl #8 +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s +; CHECK-NEXT: mvni v1.2s, #15, msl #8 +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i13> @llvm.fptosi.sat.v2f32.v2i13(<2 x float> %f) ret <2 x i13> %x @@ -901,26 +734,11 @@ define <2 x i16> @test_signed_v2f32_v2i16(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-956301312 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #65024 -; CHECK-NEXT: movk w8, #18175, lsl #16 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2s, #127, msl #8 +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s +; CHECK-NEXT: mvni v1.2s, #127, msl #8 +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i16> @llvm.fptosi.sat.v2f32.v2i16(<2 x float> %f) ret <2 x i16> %x @@ -929,26 +747,11 @@ define <2 x i19> @test_signed_v2f32_v2i19(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-931135488 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #65472 -; CHECK-NEXT: movk w8, #18559, lsl #16 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2s, #3, msl #16 +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s +; CHECK-NEXT: mvni v1.2s, #3, msl #16 +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i19> @llvm.fptosi.sat.v2f32.v2i19(<2 x float> %f) ret <2 x i19> %x @@ -1160,36 +963,12 @@ define <4 x i1> @test_signed_v4f32_v4i1(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmov s2, #-1.00000000 -; CHECK-NEXT: movi d4, #0000000000000000 -; CHECK-NEXT: mov s6, v0.s[2] -; CHECK-NEXT: fmaxnm s5, s0, s2 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s5, s5, s4 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzs w9, s5 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s2, s3, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %x = call <4 x i1> @llvm.fptosi.sat.v4f32.v4i1(<4 x float> %f) ret <4 x i1> %x @@ -1198,38 +977,12 @@ define <4 x i8> @test_signed_v4f32_v4i8(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1023410176 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: mov s6, v0.s[2] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #1123942400 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s5, s0, s2 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s5, s5, s4 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzs w9, s5 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s2, s3, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.4s, #127 +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mvni v1.4s, #127 +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %x = call <4 x i8> @llvm.fptosi.sat.v4f32.v4i8(<4 x float> %f) ret <4 x i8> %x @@ -1238,39 +991,12 @@ define <4 x i13> @test_signed_v4f32_v4i13(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i13: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-981467136 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: mov s6, v0.s[2] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #61440 -; CHECK-NEXT: movk w8, #17791, lsl #16 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmaxnm s5, s0, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s5, s5, s4 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzs w9, s5 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s2, s3, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.4s, #15, msl #8 +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mvni v1.4s, #15, msl #8 +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %x = call <4 x i13> @llvm.fptosi.sat.v4f32.v4i13(<4 x float> %f) ret <4 x i13> %x @@ -1279,39 +1005,8 @@ define <4 x i16> @test_signed_v4f32_v4i16(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-956301312 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: mov s6, v0.s[2] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #65024 -; CHECK-NEXT: movk w8, #18175, lsl #16 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmaxnm s5, s0, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s5, s5, s4 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzs w9, s5 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s2, s3, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: sqxtn v0.4h, v0.4s ; CHECK-NEXT: ret %x = call <4 x i16> @llvm.fptosi.sat.v4f32.v4i16(<4 x float> %f) ret <4 x i16> %x @@ -1320,38 +1015,11 @@ define <4 x i19> @test_signed_v4f32_v4i19(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-931135488 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: mov s6, v0.s[2] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #65472 -; CHECK-NEXT: movk w8, #18559, lsl #16 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmaxnm s5, s0, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s5, s5, s4 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzs w9, s5 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s2, s3, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: movi v1.4s, #3, msl #16 +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mvni v1.4s, #3, msl #16 +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %x = call <4 x i19> @llvm.fptosi.sat.v4f32.v4i19(<4 x float> %f) ret <4 x i19> %x @@ -1982,135 +1650,73 @@ declare <4 x i128> @llvm.fptosi.sat.v4f16.v4i128(<4 x half>) define <4 x i1> @test_signed_v4f16_v4i1(<4 x half> %f) { -; CHECK-LABEL: test_signed_v4f16_v4i1: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: fmov s2, #-1.00000000 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: movi d5, #0000000000000000 -; CHECK-NEXT: mov h6, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fmaxnm s7, s3, s2 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fmaxnm s4, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fminnm s7, s7, s5 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fcvtzs w9, s7 -; CHECK-NEXT: fminnm s1, s1, s5 -; CHECK-NEXT: fcvtzs w8, s4 -; CHECK-NEXT: fcvt s4, h0 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fmaxnm s2, s4, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s5 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v4f16_v4i1: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: movi v1.2d, #0000000000000000 +; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v4f16_v4i1: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: ret %x = call <4 x i1> @llvm.fptosi.sat.v4f16.v4i1(<4 x half> %f) ret <4 x i1> %x } define <4 x i8> @test_signed_v4f16_v4i8(<4 x half> %f) { -; CHECK-LABEL: test_signed_v4f16_v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: mov w8, #-1023410176 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov h6, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #1123942400 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fmov s5, w8 -; CHECK-NEXT: fmaxnm s7, s3, s2 -; CHECK-NEXT: fmaxnm s4, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s7, s7, s5 -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fminnm s1, s1, s5 -; CHECK-NEXT: fcvtzs w9, s7 -; CHECK-NEXT: fcvtzs w8, s4 -; CHECK-NEXT: fcvt s4, h0 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fmaxnm s2, s4, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s5 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v4f16_v4i8: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: movi v1.4s, #127 +; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: mvni v1.4s, #127 +; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v4f16_v4i8: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: movi v1.4h, #127 +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: mvni v1.4h, #127 +; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: ret %x = call <4 x i8> @llvm.fptosi.sat.v4f16.v4i8(<4 x half> %f) ret <4 x i8> %x } define <4 x i13> @test_signed_v4f16_v4i13(<4 x half> %f) { -; CHECK-LABEL: test_signed_v4f16_v4i13: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: mov w8, #-981467136 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov h6, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #61440 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: movk w8, #17791, lsl #16 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fmaxnm s7, s3, s2 -; CHECK-NEXT: fmov s5, w8 -; CHECK-NEXT: fmaxnm s4, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s7, s7, s5 -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fminnm s1, s1, s5 -; CHECK-NEXT: fcvtzs w9, s7 -; CHECK-NEXT: fcvtzs w8, s4 -; CHECK-NEXT: fcvt s4, h0 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fmaxnm s2, s4, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s5 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v4f16_v4i13: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: movi v1.4s, #15, msl #8 +; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: mvni v1.4s, #15, msl #8 +; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v4f16_v4i13: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: mvni v1.4h, #240, lsl #8 +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: movi v1.4h, #240, lsl #8 +; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: ret %x = call <4 x i13> @llvm.fptosi.sat.v4f16.v4i13(<4 x half> %f) ret <4 x i13> %x } @@ -2119,39 +1725,8 @@ ; CHECK-CVT-LABEL: test_signed_v4f16_v4i16: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: mov w8, #-956301312 -; CHECK-CVT-NEXT: fmov s2, w8 -; CHECK-CVT-NEXT: mov w8, #65024 -; CHECK-CVT-NEXT: mov s1, v0.s[1] -; CHECK-CVT-NEXT: movk w8, #18175, lsl #16 -; CHECK-CVT-NEXT: mov s6, v0.s[2] -; CHECK-CVT-NEXT: fmaxnm s5, s0, s2 -; CHECK-CVT-NEXT: fmov s4, w8 -; CHECK-CVT-NEXT: fmaxnm s3, s1, s2 -; CHECK-CVT-NEXT: fcmp s1, s1 -; CHECK-CVT-NEXT: fmaxnm s1, s6, s2 -; CHECK-CVT-NEXT: fminnm s5, s5, s4 -; CHECK-CVT-NEXT: fminnm s3, s3, s4 -; CHECK-CVT-NEXT: fminnm s1, s1, s4 -; CHECK-CVT-NEXT: fcvtzs w9, s5 -; CHECK-CVT-NEXT: fcvtzs w8, s3 -; CHECK-CVT-NEXT: mov s3, v0.s[3] -; CHECK-CVT-NEXT: csel w8, wzr, w8, vs -; CHECK-CVT-NEXT: fcmp s0, s0 -; CHECK-CVT-NEXT: fmaxnm s2, s3, s2 -; CHECK-CVT-NEXT: csel w9, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s6, s6 -; CHECK-CVT-NEXT: fmov s0, w9 -; CHECK-CVT-NEXT: fcvtzs w9, s1 -; CHECK-CVT-NEXT: fminnm s1, s2, s4 -; CHECK-CVT-NEXT: mov v0.h[1], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s3, s3 -; CHECK-CVT-NEXT: fcvtzs w9, s1 -; CHECK-CVT-NEXT: mov v0.h[2], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: mov v0.h[3], w8 -; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-NEXT: sqxtn v0.4h, v0.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i16: @@ -2165,71 +1740,23 @@ define <4 x i19> @test_signed_v4f16_v4i19(<4 x half> %f) { ; CHECK-LABEL: test_signed_v4f16_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: mov w8, #-931135488 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov h6, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #65472 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: movk w8, #18559, lsl #16 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fmaxnm s7, s3, s2 -; CHECK-NEXT: fmov s5, w8 -; CHECK-NEXT: fmaxnm s4, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s7, s7, s5 -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fminnm s1, s1, s5 -; CHECK-NEXT: fcvtzs w9, s7 -; CHECK-NEXT: fcvtzs w8, s4 -; CHECK-NEXT: fcvt s4, h0 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fmaxnm s2, s4, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s5 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: movi v1.4s, #3, msl #16 +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mvni v1.4s, #3, msl #16 +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %x = call <4 x i19> @llvm.fptosi.sat.v4f16.v4i19(<4 x half> %f) ret <4 x i19> %x } define <4 x i32> @test_signed_v4f16_v4i32_duplicate(<4 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v4f16_v4i32_duplicate: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_signed_v4f16_v4i32_duplicate: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: fcvtzs w8, h0 -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzs w8, h2 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: mov v1.s[1], w8 -; CHECK-FP16-NEXT: fcvtzs w8, h2 -; CHECK-FP16-NEXT: mov v1.s[2], w8 -; CHECK-FP16-NEXT: fcvtzs w8, h0 -; CHECK-FP16-NEXT: mov v1.s[3], w8 -; CHECK-FP16-NEXT: mov v0.16b, v1.16b -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_signed_v4f16_v4i32_duplicate: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptosi.sat.v4f16.v4i32(<4 x half> %f) ret <4 x i32> %x } @@ -2555,75 +2082,78 @@ declare <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half>) define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: fmov s2, #-1.00000000 -; CHECK-NEXT: fcvt s4, h0 -; CHECK-NEXT: movi d3, #0000000000000000 -; CHECK-NEXT: mov h6, v0.h[2] -; CHECK-NEXT: mov h7, v0.h[3] -; CHECK-NEXT: mov h17, v0.h[4] -; CHECK-NEXT: mov h18, v0.h[6] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fmaxnm s16, s4, s2 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: fmaxnm s5, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fminnm s16, s16, s3 -; CHECK-NEXT: mov h1, v0.h[5] -; CHECK-NEXT: mov h0, v0.h[7] -; CHECK-NEXT: fminnm s5, s5, s3 -; CHECK-NEXT: fcvtzs w9, s16 -; CHECK-NEXT: fmaxnm s16, s7, s2 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvtzs w8, s5 -; CHECK-NEXT: fmaxnm s5, s6, s2 -; CHECK-NEXT: fminnm s16, s16, s3 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fminnm s4, s5, s3 -; CHECK-NEXT: fcvt s5, h17 -; CHECK-NEXT: fcvt s17, h1 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fcvtzs w10, s4 -; CHECK-NEXT: fmaxnm s4, s5, s2 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: fcvtzs w9, s16 -; CHECK-NEXT: fmaxnm s6, s17, s2 -; CHECK-NEXT: mov v1.b[1], w8 -; CHECK-NEXT: csel w8, wzr, w10, vs -; CHECK-NEXT: fcmp s7, s7 -; CHECK-NEXT: fcvt s7, h18 -; CHECK-NEXT: fminnm s4, s4, s3 -; CHECK-NEXT: fminnm s6, s6, s3 -; CHECK-NEXT: mov v1.b[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s5, s5 -; CHECK-NEXT: fcvtzs w9, s4 -; CHECK-NEXT: fmaxnm s4, s7, s2 -; CHECK-NEXT: fcvtzs w10, s6 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: mov v1.b[3], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s17, s17 -; CHECK-NEXT: fminnm s4, s4, s3 -; CHECK-NEXT: fminnm s2, s2, s3 -; CHECK-NEXT: mov v1.b[4], w8 -; CHECK-NEXT: csel w8, wzr, w10, vs -; CHECK-NEXT: fcmp s7, s7 -; CHECK-NEXT: fcvtzs w9, s4 -; CHECK-NEXT: mov v1.b[5], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcvtzs w9, s2 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: mov v1.b[6], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v1.b[7], w8 -; CHECK-NEXT: fmov d0, d1 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v8f16_v8i1: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: fcvtzs w9, s1 +; CHECK-CVT-NEXT: fcvtzs w13, s0 +; CHECK-CVT-NEXT: fcvtzs w8, s2 +; CHECK-CVT-NEXT: mov s2, v1.s[2] +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: csel w8, w8, wzr, lt +; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: csel w9, w9, wzr, lt +; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge +; CHECK-CVT-NEXT: cmp w10, #0 +; CHECK-CVT-NEXT: csel w10, w10, wzr, lt +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: cmp w10, #0 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge +; CHECK-CVT-NEXT: cmp w11, #0 +; CHECK-CVT-NEXT: csel w11, w11, wzr, lt +; CHECK-CVT-NEXT: fcvtzs w14, s1 +; CHECK-CVT-NEXT: cmp w11, #0 +; CHECK-CVT-NEXT: fmov s2, w9 +; CHECK-CVT-NEXT: csinv w11, w11, wzr, ge +; CHECK-CVT-NEXT: cmp w12, #0 +; CHECK-CVT-NEXT: csel w12, w12, wzr, lt +; CHECK-CVT-NEXT: cmp w12, #0 +; CHECK-CVT-NEXT: csinv w12, w12, wzr, ge +; CHECK-CVT-NEXT: cmp w13, #0 +; CHECK-CVT-NEXT: csel w13, w13, wzr, lt +; CHECK-CVT-NEXT: cmp w13, #0 +; CHECK-CVT-NEXT: csinv w13, w13, wzr, ge +; CHECK-CVT-NEXT: cmp w14, #0 +; CHECK-CVT-NEXT: csel w9, w14, wzr, lt +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: fmov s1, w13 +; CHECK-CVT-NEXT: fcvtzs w13, s0 +; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge +; CHECK-CVT-NEXT: mov v2.s[1], w8 +; CHECK-CVT-NEXT: mov v1.s[1], w12 +; CHECK-CVT-NEXT: cmp w13, #0 +; CHECK-CVT-NEXT: csel w8, w13, wzr, lt +; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge +; CHECK-CVT-NEXT: mov v1.s[2], w9 +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: mov v2.s[3], w11 +; CHECK-CVT-NEXT: xtn v0.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v8f16_v8i1: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: xtn v0.8b, v0.8h +; CHECK-FP16-NEXT: ret %x = call <8 x i1> @llvm.fptosi.sat.v8f16.v8i1(<8 x half> %f) ret <8 x i1> %x } @@ -2631,217 +2161,151 @@ define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) { ; CHECK-CVT-LABEL: test_signed_v8f16_v8i8: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v3.4s, v0.4h -; CHECK-CVT-NEXT: mov w8, #-1023410176 -; CHECK-CVT-NEXT: fmov s1, w8 -; CHECK-CVT-NEXT: mov w8, #1123942400 -; CHECK-CVT-NEXT: mov s4, v3.s[1] -; CHECK-CVT-NEXT: mov s7, v3.s[2] -; CHECK-CVT-NEXT: mov s16, v3.s[3] -; CHECK-CVT-NEXT: fmov s2, w8 -; CHECK-CVT-NEXT: fmaxnm s6, s3, s1 -; CHECK-CVT-NEXT: fmaxnm s5, s4, s1 -; CHECK-CVT-NEXT: fcmp s4, s4 -; CHECK-CVT-NEXT: fcvtl2 v4.4s, v0.8h -; CHECK-CVT-NEXT: fmaxnm s0, s7, s1 -; CHECK-CVT-NEXT: fminnm s6, s6, s2 -; CHECK-CVT-NEXT: fminnm s5, s5, s2 -; CHECK-CVT-NEXT: fminnm s0, s0, s2 -; CHECK-CVT-NEXT: fcvtzs w9, s6 -; CHECK-CVT-NEXT: fmaxnm s6, s4, s1 -; CHECK-CVT-NEXT: fcvtzs w8, s5 -; CHECK-CVT-NEXT: fmaxnm s5, s16, s1 -; CHECK-CVT-NEXT: fcvtzs w10, s0 -; CHECK-CVT-NEXT: fminnm s6, s6, s2 -; CHECK-CVT-NEXT: csel w8, wzr, w8, vs -; CHECK-CVT-NEXT: fcmp s3, s3 -; CHECK-CVT-NEXT: mov s3, v4.s[1] -; CHECK-CVT-NEXT: fminnm s5, s5, s2 -; CHECK-CVT-NEXT: csel w9, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s7, s7 -; CHECK-CVT-NEXT: mov s7, v4.s[2] -; CHECK-CVT-NEXT: fmov s0, w9 -; CHECK-CVT-NEXT: fcvtzs w9, s5 -; CHECK-CVT-NEXT: fmaxnm s5, s3, s1 -; CHECK-CVT-NEXT: mov v0.b[1], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w10, vs -; CHECK-CVT-NEXT: fcmp s16, s16 -; CHECK-CVT-NEXT: fminnm s5, s5, s2 -; CHECK-CVT-NEXT: mov v0.b[2], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcvtzs w9, s6 -; CHECK-CVT-NEXT: fmaxnm s6, s7, s1 -; CHECK-CVT-NEXT: fcmp s4, s4 -; CHECK-CVT-NEXT: mov s4, v4.s[3] -; CHECK-CVT-NEXT: fcvtzs w10, s5 -; CHECK-CVT-NEXT: mov v0.b[3], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s3, s3 -; CHECK-CVT-NEXT: fminnm s5, s6, s2 -; CHECK-CVT-NEXT: fmaxnm s1, s4, s1 -; CHECK-CVT-NEXT: mov v0.b[4], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w10, vs -; CHECK-CVT-NEXT: fcmp s7, s7 -; CHECK-CVT-NEXT: fcvtzs w9, s5 -; CHECK-CVT-NEXT: fminnm s1, s1, s2 -; CHECK-CVT-NEXT: mov v0.b[5], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s4, s4 -; CHECK-CVT-NEXT: fcvtzs w9, s1 -; CHECK-CVT-NEXT: mov v0.b[6], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: mov v0.b[7], w8 -; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: mov w8, #127 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w10, #-128 +; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: fcvtzs w15, s0 +; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: mov s2, v1.s[2] +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: cmp w9, #127 +; CHECK-CVT-NEXT: csel w9, w9, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: cmn w9, #128 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: csel w9, w9, w10, gt +; CHECK-CVT-NEXT: cmp w11, #127 +; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: fcvtzs w13, s1 +; CHECK-CVT-NEXT: cmn w11, #128 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: cmp w12, #127 +; CHECK-CVT-NEXT: csel w12, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: cmn w12, #128 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csel w12, w12, w10, gt +; CHECK-CVT-NEXT: cmp w13, #127 +; CHECK-CVT-NEXT: csel w13, w13, w8, lt +; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: cmn w13, #128 +; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: csel w13, w13, w10, gt +; CHECK-CVT-NEXT: cmp w14, #127 +; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: cmn w14, #128 +; CHECK-CVT-NEXT: csel w14, w14, w10, gt +; CHECK-CVT-NEXT: cmp w15, #127 +; CHECK-CVT-NEXT: csel w15, w15, w8, lt +; CHECK-CVT-NEXT: cmn w15, #128 +; CHECK-CVT-NEXT: csel w15, w15, w10, gt +; CHECK-CVT-NEXT: cmp w16, #127 +; CHECK-CVT-NEXT: csel w11, w16, w8, lt +; CHECK-CVT-NEXT: cmn w11, #128 +; CHECK-CVT-NEXT: fmov s1, w15 +; CHECK-CVT-NEXT: fcvtzs w15, s0 +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: cmp w15, #127 +; CHECK-CVT-NEXT: csel w8, w15, w8, lt +; CHECK-CVT-NEXT: cmn w8, #128 +; CHECK-CVT-NEXT: csel w8, w8, w10, gt +; CHECK-CVT-NEXT: mov v1.s[2], w11 +; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i8: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: mov w8, #-1023410176 -; CHECK-FP16-NEXT: fcvt s4, h0 -; CHECK-FP16-NEXT: mov h6, v0.h[2] -; CHECK-FP16-NEXT: mov h7, v0.h[3] -; CHECK-FP16-NEXT: mov h17, v0.h[4] -; CHECK-FP16-NEXT: fmov s2, w8 -; CHECK-FP16-NEXT: mov w8, #1123942400 -; CHECK-FP16-NEXT: fcvt s1, h1 -; CHECK-FP16-NEXT: mov h18, v0.h[6] -; CHECK-FP16-NEXT: fcvt s6, h6 -; CHECK-FP16-NEXT: fmov s3, w8 -; CHECK-FP16-NEXT: fmaxnm s16, s4, s2 -; CHECK-FP16-NEXT: fcvt s7, h7 -; CHECK-FP16-NEXT: fmaxnm s5, s1, s2 -; CHECK-FP16-NEXT: fcmp s1, s1 -; CHECK-FP16-NEXT: mov h1, v0.h[5] -; CHECK-FP16-NEXT: mov h0, v0.h[7] -; CHECK-FP16-NEXT: fminnm s16, s16, s3 -; CHECK-FP16-NEXT: fminnm s5, s5, s3 -; CHECK-FP16-NEXT: fcvt s0, h0 -; CHECK-FP16-NEXT: fcvtzs w9, s16 -; CHECK-FP16-NEXT: fmaxnm s16, s7, s2 -; CHECK-FP16-NEXT: fcvtzs w8, s5 -; CHECK-FP16-NEXT: fmaxnm s5, s6, s2 -; CHECK-FP16-NEXT: fminnm s16, s16, s3 -; CHECK-FP16-NEXT: csel w8, wzr, w8, vs -; CHECK-FP16-NEXT: fcmp s4, s4 -; CHECK-FP16-NEXT: fminnm s4, s5, s3 -; CHECK-FP16-NEXT: fcvt s5, h17 -; CHECK-FP16-NEXT: fcvt s17, h1 -; CHECK-FP16-NEXT: csel w9, wzr, w9, vs -; CHECK-FP16-NEXT: fcmp s6, s6 -; CHECK-FP16-NEXT: fcvtzs w10, s4 -; CHECK-FP16-NEXT: fmaxnm s4, s5, s2 -; CHECK-FP16-NEXT: fmov s1, w9 -; CHECK-FP16-NEXT: fcvtzs w9, s16 -; CHECK-FP16-NEXT: fmaxnm s6, s17, s2 -; CHECK-FP16-NEXT: mov v1.b[1], w8 -; CHECK-FP16-NEXT: csel w8, wzr, w10, vs -; CHECK-FP16-NEXT: fcmp s7, s7 -; CHECK-FP16-NEXT: fcvt s7, h18 -; CHECK-FP16-NEXT: fminnm s4, s4, s3 -; CHECK-FP16-NEXT: fminnm s6, s6, s3 -; CHECK-FP16-NEXT: mov v1.b[2], w8 -; CHECK-FP16-NEXT: csel w8, wzr, w9, vs -; CHECK-FP16-NEXT: fcmp s5, s5 -; CHECK-FP16-NEXT: fcvtzs w9, s4 -; CHECK-FP16-NEXT: fmaxnm s4, s7, s2 -; CHECK-FP16-NEXT: fcvtzs w10, s6 -; CHECK-FP16-NEXT: fmaxnm s2, s0, s2 -; CHECK-FP16-NEXT: mov v1.b[3], w8 -; CHECK-FP16-NEXT: csel w8, wzr, w9, vs -; CHECK-FP16-NEXT: fcmp s17, s17 -; CHECK-FP16-NEXT: fminnm s4, s4, s3 -; CHECK-FP16-NEXT: fminnm s2, s2, s3 -; CHECK-FP16-NEXT: mov v1.b[4], w8 -; CHECK-FP16-NEXT: csel w8, wzr, w10, vs -; CHECK-FP16-NEXT: fcmp s7, s7 -; CHECK-FP16-NEXT: fcvtzs w9, s4 -; CHECK-FP16-NEXT: mov v1.b[5], w8 -; CHECK-FP16-NEXT: csel w8, wzr, w9, vs -; CHECK-FP16-NEXT: fcvtzs w9, s2 -; CHECK-FP16-NEXT: fcmp s0, s0 -; CHECK-FP16-NEXT: mov v1.b[6], w8 -; CHECK-FP16-NEXT: csel w8, wzr, w9, vs -; CHECK-FP16-NEXT: mov v1.b[7], w8 -; CHECK-FP16-NEXT: fmov d0, d1 +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: sqxtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret %x = call <8 x i8> @llvm.fptosi.sat.v8f16.v8i8(<8 x half> %f) ret <8 x i8> %x } define <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i13: -; CHECK: // %bb.0: -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: mov w8, #-981467136 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov h6, v0.h[2] -; CHECK-NEXT: mov h7, v0.h[3] -; CHECK-NEXT: mov h17, v0.h[4] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #61440 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: movk w8, #17791, lsl #16 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: mov h18, v0.h[6] -; CHECK-NEXT: fmaxnm s16, s3, s2 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: fmov s5, w8 -; CHECK-NEXT: fmaxnm s4, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: mov h1, v0.h[5] -; CHECK-NEXT: mov h0, v0.h[7] -; CHECK-NEXT: fminnm s16, s16, s5 -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvtzs w9, s16 -; CHECK-NEXT: fmaxnm s16, s7, s2 -; CHECK-NEXT: fcvtzs w8, s4 -; CHECK-NEXT: fmaxnm s4, s6, s2 -; CHECK-NEXT: fminnm s16, s16, s5 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fminnm s3, s4, s5 -; CHECK-NEXT: fcvt s4, h17 -; CHECK-NEXT: fcvt s17, h1 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fcvtzs w10, s3 -; CHECK-NEXT: fmaxnm s3, s4, s2 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: fcvtzs w9, s16 -; CHECK-NEXT: fmaxnm s6, s17, s2 -; CHECK-NEXT: mov v1.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w10, vs -; CHECK-NEXT: fcmp s7, s7 -; CHECK-NEXT: fcvt s7, h18 -; CHECK-NEXT: fminnm s3, s3, s5 -; CHECK-NEXT: fminnm s6, s6, s5 -; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs w9, s3 -; CHECK-NEXT: fmaxnm s3, s7, s2 -; CHECK-NEXT: fcvtzs w10, s6 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s17, s17 -; CHECK-NEXT: fminnm s3, s3, s5 -; CHECK-NEXT: fminnm s2, s2, s5 -; CHECK-NEXT: mov v1.h[4], w8 -; CHECK-NEXT: csel w8, wzr, w10, vs -; CHECK-NEXT: fcmp s7, s7 -; CHECK-NEXT: fcvtzs w9, s3 -; CHECK-NEXT: mov v1.h[5], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcvtzs w9, s2 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: mov v1.h[6], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v1.h[7], w8 -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v8f16_v8i13: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: mov w8, #4095 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w10, #-4096 +; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: fcvtzs w15, s0 +; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: mov s2, v1.s[2] +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: cmp w9, #4095 +; CHECK-CVT-NEXT: csel w9, w9, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: csel w9, w9, w10, gt +; CHECK-CVT-NEXT: cmp w11, #4095 +; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: fcvtzs w13, s1 +; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: cmp w12, #4095 +; CHECK-CVT-NEXT: csel w12, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: cmn w12, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csel w12, w12, w10, gt +; CHECK-CVT-NEXT: cmp w13, #4095 +; CHECK-CVT-NEXT: csel w13, w13, w8, lt +; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: cmn w13, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: csel w13, w13, w10, gt +; CHECK-CVT-NEXT: cmp w14, #4095 +; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: cmn w14, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w14, w14, w10, gt +; CHECK-CVT-NEXT: cmp w15, #4095 +; CHECK-CVT-NEXT: csel w15, w15, w8, lt +; CHECK-CVT-NEXT: cmn w15, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w15, w15, w10, gt +; CHECK-CVT-NEXT: cmp w16, #4095 +; CHECK-CVT-NEXT: csel w11, w16, w8, lt +; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: fmov s1, w15 +; CHECK-CVT-NEXT: fcvtzs w15, s0 +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: cmp w15, #4095 +; CHECK-CVT-NEXT: csel w8, w15, w8, lt +; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w8, w8, w10, gt +; CHECK-CVT-NEXT: mov v1.s[2], w11 +; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v8f16_v8i13: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: mvni v1.8h, #240, lsl #8 +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: movi v1.8h, #240, lsl #8 +; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: ret %x = call <8 x i13> @llvm.fptosi.sat.v8f16.v8i13(<8 x half> %f) ret <8 x i13> %x } @@ -2849,67 +2313,66 @@ define <8 x i16> @test_signed_v8f16_v8i16(<8 x half> %f) { ; CHECK-CVT-LABEL: test_signed_v8f16_v8i16: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: mov w8, #-956301312 -; CHECK-CVT-NEXT: fmov s3, w8 -; CHECK-CVT-NEXT: mov w8, #65024 +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: mov w8, #32767 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w10, #-32768 ; CHECK-CVT-NEXT: mov s2, v1.s[1] -; CHECK-CVT-NEXT: movk w8, #18175, lsl #16 -; CHECK-CVT-NEXT: mov s7, v1.s[2] -; CHECK-CVT-NEXT: mov s16, v1.s[3] -; CHECK-CVT-NEXT: fmaxnm s6, s1, s3 -; CHECK-CVT-NEXT: fmov s5, w8 -; CHECK-CVT-NEXT: fmaxnm s4, s2, s3 -; CHECK-CVT-NEXT: fcmp s2, s2 -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-CVT-NEXT: fmaxnm s0, s7, s3 -; CHECK-CVT-NEXT: fminnm s6, s6, s5 -; CHECK-CVT-NEXT: fminnm s4, s4, s5 -; CHECK-CVT-NEXT: fminnm s0, s0, s5 -; CHECK-CVT-NEXT: fcvtzs w9, s6 -; CHECK-CVT-NEXT: fmaxnm s6, s2, s3 -; CHECK-CVT-NEXT: fcvtzs w8, s4 -; CHECK-CVT-NEXT: fmaxnm s4, s16, s3 -; CHECK-CVT-NEXT: fcvtzs w10, s0 -; CHECK-CVT-NEXT: fminnm s6, s6, s5 -; CHECK-CVT-NEXT: csel w8, wzr, w8, vs -; CHECK-CVT-NEXT: fcmp s1, s1 -; CHECK-CVT-NEXT: mov s1, v2.s[1] -; CHECK-CVT-NEXT: fminnm s4, s4, s5 -; CHECK-CVT-NEXT: csel w9, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s7, s7 -; CHECK-CVT-NEXT: mov s7, v2.s[2] -; CHECK-CVT-NEXT: fmov s0, w9 -; CHECK-CVT-NEXT: fcvtzs w9, s4 -; CHECK-CVT-NEXT: fmaxnm s4, s1, s3 -; CHECK-CVT-NEXT: mov v0.h[1], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w10, vs -; CHECK-CVT-NEXT: fcmp s16, s16 -; CHECK-CVT-NEXT: fminnm s4, s4, s5 -; CHECK-CVT-NEXT: mov v0.h[2], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcvtzs w9, s6 -; CHECK-CVT-NEXT: fmaxnm s6, s7, s3 -; CHECK-CVT-NEXT: fcmp s2, s2 -; CHECK-CVT-NEXT: mov s2, v2.s[3] -; CHECK-CVT-NEXT: fcvtzs w10, s4 -; CHECK-CVT-NEXT: mov v0.h[3], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s1, s1 -; CHECK-CVT-NEXT: fminnm s4, s6, s5 -; CHECK-CVT-NEXT: fmaxnm s1, s2, s3 -; CHECK-CVT-NEXT: mov v0.h[4], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w10, vs -; CHECK-CVT-NEXT: fcmp s7, s7 -; CHECK-CVT-NEXT: fcvtzs w9, s4 -; CHECK-CVT-NEXT: fminnm s1, s1, s5 -; CHECK-CVT-NEXT: mov v0.h[5], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s2, s2 -; CHECK-CVT-NEXT: fcvtzs w9, s1 -; CHECK-CVT-NEXT: mov v0.h[6], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: mov v0.h[7], w8 +; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: fcvtzs w15, s0 +; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: mov s2, v1.s[2] +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: csel w9, w9, w10, gt +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: fcvtzs w13, s1 +; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: csel w12, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csel w12, w12, w10, gt +; CHECK-CVT-NEXT: cmp w13, w8 +; CHECK-CVT-NEXT: csel w13, w13, w8, lt +; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: csel w13, w13, w10, gt +; CHECK-CVT-NEXT: cmp w14, w8 +; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w14, w14, w10, gt +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: csel w15, w15, w8, lt +; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w15, w15, w10, gt +; CHECK-CVT-NEXT: cmp w16, w8 +; CHECK-CVT-NEXT: csel w11, w16, w8, lt +; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s1, w15 +; CHECK-CVT-NEXT: fcvtzs w15, s0 +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: csel w8, w15, w8, lt +; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w8, w8, w10, gt +; CHECK-CVT-NEXT: mov v1.s[2], w11 +; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i16: @@ -2923,107 +2386,37 @@ define <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { ; CHECK-LABEL: test_signed_v8f16_v8i19: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov w8, #-931135488 -; CHECK-NEXT: fcvt s18, h0 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: mov w8, #65472 -; CHECK-NEXT: fcvt s2, h1 -; CHECK-NEXT: mov h3, v1.h[1] -; CHECK-NEXT: movk w8, #18559, lsl #16 -; CHECK-NEXT: mov h5, v1.h[2] -; CHECK-NEXT: mov h1, v1.h[3] -; CHECK-NEXT: fmaxnm s6, s2, s4 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fmov s7, w8 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcmp s2, s2 -; CHECK-NEXT: fminnm s6, s6, s7 -; CHECK-NEXT: fmaxnm s16, s3, s4 -; CHECK-NEXT: fmaxnm s17, s5, s4 -; CHECK-NEXT: fcvtzs w8, s6 -; CHECK-NEXT: fminnm s2, s16, s7 -; CHECK-NEXT: mov h6, v0.h[1] -; CHECK-NEXT: fmaxnm s16, s1, s4 -; CHECK-NEXT: fminnm s17, s17, s7 -; CHECK-NEXT: csel w4, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: mov h3, v0.h[2] -; CHECK-NEXT: fcvtzs w8, s2 -; CHECK-NEXT: fcvt s2, h6 -; CHECK-NEXT: fminnm s6, s16, s7 -; CHECK-NEXT: fmaxnm s16, s18, s4 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fcvtzs w9, s17 -; CHECK-NEXT: csel w5, wzr, w8, vs -; CHECK-NEXT: fcmp s5, s5 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fmaxnm s5, s2, s4 -; CHECK-NEXT: fcvtzs w8, s6 -; CHECK-NEXT: fminnm s6, s16, s7 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: csel w6, wzr, w9, vs -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s1, s3, s4 -; CHECK-NEXT: fminnm s5, s5, s7 -; CHECK-NEXT: fcvtzs w9, s6 -; CHECK-NEXT: csel w7, wzr, w8, vs -; CHECK-NEXT: fcmp s18, s18 -; CHECK-NEXT: fmaxnm s4, s0, s4 -; CHECK-NEXT: fminnm s1, s1, s7 -; CHECK-NEXT: fcvtzs w8, s5 -; CHECK-NEXT: csel w0, wzr, w9, vs -; CHECK-NEXT: fcmp s2, s2 -; CHECK-NEXT: fminnm s2, s4, s7 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: csel w1, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs w8, s2 -; CHECK-NEXT: csel w2, wzr, w9, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel w3, wzr, w8, vs +; CHECK-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: movi v1.4s, #3, msl #16 +; CHECK-NEXT: mvni v3.4s, #3, msl #16 +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v3.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v3.4s +; CHECK-NEXT: mov w1, v1.s[1] +; CHECK-NEXT: mov w2, v1.s[2] +; CHECK-NEXT: mov w5, v0.s[1] +; CHECK-NEXT: mov w3, v1.s[3] +; CHECK-NEXT: mov w6, v0.s[2] +; CHECK-NEXT: mov w7, v0.s[3] +; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: ret %x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x } define <8 x i32> @test_signed_v8f16_v8i32_duplicate(<8 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v8f16_v8i32_duplicate: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_signed_v8f16_v8i32_duplicate: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h4, v0.h[1] -; CHECK-FP16-NEXT: fcvtzs w9, h0 -; CHECK-FP16-NEXT: mov h2, v3.h[1] -; CHECK-FP16-NEXT: fcvtzs w8, h3 -; CHECK-FP16-NEXT: mov h5, v3.h[2] -; CHECK-FP16-NEXT: mov h3, v3.h[3] -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzs w8, h2 -; CHECK-FP16-NEXT: fmov s2, w9 -; CHECK-FP16-NEXT: fcvtzs w9, h4 -; CHECK-FP16-NEXT: mov h4, v0.h[2] -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: mov v1.s[1], w8 -; CHECK-FP16-NEXT: fcvtzs w8, h5 -; CHECK-FP16-NEXT: mov v2.s[1], w9 -; CHECK-FP16-NEXT: fcvtzs w9, h4 -; CHECK-FP16-NEXT: mov v1.s[2], w8 -; CHECK-FP16-NEXT: fcvtzs w8, h3 -; CHECK-FP16-NEXT: mov v2.s[2], w9 -; CHECK-FP16-NEXT: fcvtzs w9, h0 -; CHECK-FP16-NEXT: mov v1.s[3], w8 -; CHECK-FP16-NEXT: mov v2.s[3], w9 -; CHECK-FP16-NEXT: mov v0.16b, v2.16b -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_signed_v8f16_v8i32_duplicate: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret %x = call <8 x i32> @llvm.fptosi.sat.v8f16.v8i32(<8 x half> %f) ret <8 x i32> %x } Index: llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -511,224 +511,98 @@ } define <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) { -; CHECK-CVT-LABEL: test_unsigned_v2f16_v2i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_unsigned_v2f16_v2i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: fcvtzu w8, h0 -; CHECK-FP16-NEXT: fmov s0, w8 -; CHECK-FP16-NEXT: fcvtzu w8, h1 -; CHECK-FP16-NEXT: mov v0.s[1], w8 -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_unsigned_v2f16_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptoui.sat.v2f16.v2i32(<2 x half> %f) ret <2 x i32> %x } define <3 x i32> @test_unsigned_v3f16_v3i32(<3 x half> %f) { -; CHECK-CVT-LABEL: test_unsigned_v3f16_v3i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_unsigned_v3f16_v3i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: fcvtzu w8, h0 -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzu w8, h2 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: mov v1.s[1], w8 -; CHECK-FP16-NEXT: fcvtzu w8, h2 -; CHECK-FP16-NEXT: mov v1.s[2], w8 -; CHECK-FP16-NEXT: fcvtzu w8, h0 -; CHECK-FP16-NEXT: mov v1.s[3], w8 -; CHECK-FP16-NEXT: mov v0.16b, v1.16b -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_unsigned_v3f16_v3i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptoui.sat.v3f16.v3i32(<3 x half> %f) ret <3 x i32> %x } define <4 x i32> @test_unsigned_v4f16_v4i32(<4 x half> %f) { -; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: fcvtzu w8, h0 -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzu w8, h2 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: mov v1.s[1], w8 -; CHECK-FP16-NEXT: fcvtzu w8, h2 -; CHECK-FP16-NEXT: mov v1.s[2], w8 -; CHECK-FP16-NEXT: fcvtzu w8, h0 -; CHECK-FP16-NEXT: mov v1.s[3], w8 -; CHECK-FP16-NEXT: mov v0.16b, v1.16b -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_unsigned_v4f16_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptoui.sat.v4f16.v4i32(<4 x half> %f) ret <4 x i32> %x } define <5 x i32> @test_unsigned_v5f16_v5i32(<5 x half> %f) { -; CHECK-CVT-LABEL: test_unsigned_v5f16_v5i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: mov w1, v1.s[1] -; CHECK-CVT-NEXT: mov w2, v1.s[2] -; CHECK-CVT-NEXT: mov w3, v1.s[3] -; CHECK-CVT-NEXT: fmov w0, s1 -; CHECK-CVT-NEXT: fmov w4, s0 -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_unsigned_v5f16_v5i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h4, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu w0, h0 -; CHECK-FP16-NEXT: fcvtzu w1, h1 -; CHECK-FP16-NEXT: fcvtzu w2, h2 -; CHECK-FP16-NEXT: fcvtzu w4, h3 -; CHECK-FP16-NEXT: fcvtzu w3, h4 -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_unsigned_v5f16_v5i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: mov w1, v1.s[1] +; CHECK-NEXT: mov w2, v1.s[2] +; CHECK-NEXT: mov w3, v1.s[3] +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptoui.sat.v5f16.v5i32(<5 x half> %f) ret <5 x i32> %x } define <6 x i32> @test_unsigned_v6f16_v6i32(<6 x half> %f) { -; CHECK-CVT-LABEL: test_unsigned_v6f16_v6i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: mov w1, v1.s[1] -; CHECK-CVT-NEXT: mov w2, v1.s[2] -; CHECK-CVT-NEXT: mov w3, v1.s[3] -; CHECK-CVT-NEXT: mov w5, v0.s[1] -; CHECK-CVT-NEXT: fmov w0, s1 -; CHECK-CVT-NEXT: fmov w4, s0 -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_unsigned_v6f16_v6i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h4, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu w0, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] -; CHECK-FP16-NEXT: fcvtzu w8, h1 -; CHECK-FP16-NEXT: fcvtzu w2, h3 -; CHECK-FP16-NEXT: fcvtzu w3, h4 -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzu w5, h2 -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: mov v1.s[1], w5 -; CHECK-FP16-NEXT: fcvtzu w1, h2 -; CHECK-FP16-NEXT: fmov w4, s1 -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_unsigned_v6f16_v6i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: mov w1, v1.s[1] +; CHECK-NEXT: mov w2, v1.s[2] +; CHECK-NEXT: mov w3, v1.s[3] +; CHECK-NEXT: mov w5, v0.s[1] +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptoui.sat.v6f16.v6i32(<6 x half> %f) ret <6 x i32> %x } define <7 x i32> @test_unsigned_v7f16_v7i32(<7 x half> %f) { -; CHECK-CVT-LABEL: test_unsigned_v7f16_v7i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: mov w1, v1.s[1] -; CHECK-CVT-NEXT: mov w2, v1.s[2] -; CHECK-CVT-NEXT: mov w3, v1.s[3] -; CHECK-CVT-NEXT: mov w5, v0.s[1] -; CHECK-CVT-NEXT: mov w6, v0.s[2] -; CHECK-CVT-NEXT: fmov w0, s1 -; CHECK-CVT-NEXT: fmov w4, s0 -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_unsigned_v7f16_v7i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h4, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu w0, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] -; CHECK-FP16-NEXT: fcvtzu w8, h1 -; CHECK-FP16-NEXT: mov h1, v1.h[2] -; CHECK-FP16-NEXT: fcvtzu w3, h4 -; CHECK-FP16-NEXT: fmov s3, w8 -; CHECK-FP16-NEXT: fcvtzu w8, h2 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: fcvtzu w6, h1 -; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: mov v3.s[1], w8 -; CHECK-FP16-NEXT: fcvtzu w2, h2 -; CHECK-FP16-NEXT: fcvtzu w1, h1 -; CHECK-FP16-NEXT: mov v3.s[2], w6 -; CHECK-FP16-NEXT: mov w5, v3.s[1] -; CHECK-FP16-NEXT: fmov w4, s3 -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_unsigned_v7f16_v7i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: mov w1, v1.s[1] +; CHECK-NEXT: mov w2, v1.s[2] +; CHECK-NEXT: mov w3, v1.s[3] +; CHECK-NEXT: mov w5, v0.s[1] +; CHECK-NEXT: mov w6, v0.s[2] +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptoui.sat.v7f16.v7i32(<7 x half> %f) ret <7 x i32> %x } define <8 x i32> @test_unsigned_v8f16_v8i32(<8 x half> %f) { -; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i32: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h4, v0.h[1] -; CHECK-FP16-NEXT: fcvtzu w9, h0 -; CHECK-FP16-NEXT: mov h2, v3.h[1] -; CHECK-FP16-NEXT: fcvtzu w8, h3 -; CHECK-FP16-NEXT: mov h5, v3.h[2] -; CHECK-FP16-NEXT: mov h3, v3.h[3] -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzu w8, h2 -; CHECK-FP16-NEXT: fmov s2, w9 -; CHECK-FP16-NEXT: fcvtzu w9, h4 -; CHECK-FP16-NEXT: mov h4, v0.h[2] -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: mov v1.s[1], w8 -; CHECK-FP16-NEXT: fcvtzu w8, h5 -; CHECK-FP16-NEXT: mov v2.s[1], w9 -; CHECK-FP16-NEXT: fcvtzu w9, h4 -; CHECK-FP16-NEXT: mov v1.s[2], w8 -; CHECK-FP16-NEXT: fcvtzu w8, h3 -; CHECK-FP16-NEXT: mov v2.s[2], w9 -; CHECK-FP16-NEXT: fcvtzu w9, h0 -; CHECK-FP16-NEXT: mov v1.s[3], w8 -; CHECK-FP16-NEXT: mov v2.s[3], w9 -; CHECK-FP16-NEXT: mov v0.16b, v2.16b -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_unsigned_v8f16_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret %x = call <8 x i32> @llvm.fptoui.sat.v8f16.v8i32(<8 x half> %f) ret <8 x i32> %x } @@ -750,19 +624,9 @@ define <2 x i1> @test_unsigned_v2f32_v2i1(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmov s3, #1.00000000 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmaxnm s1, s2, s1 -; CHECK-NEXT: fminnm s0, s0, s3 -; CHECK-NEXT: fminnm s1, s1, s3 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2s, #1 +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i1> @llvm.fptoui.sat.v2f32.v2i1(<2 x float> %f) ret <2 x i1> %x @@ -771,20 +635,9 @@ define <2 x i8> @test_unsigned_v2f32_v2i8(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #1132396544 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmaxnm s1, s2, s1 -; CHECK-NEXT: fminnm s0, s0, s3 -; CHECK-NEXT: fminnm s1, s1, s3 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi d1, #0x0000ff000000ff +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i8> @llvm.fptoui.sat.v2f32.v2i8(<2 x float> %f) ret <2 x i8> %x @@ -793,21 +646,9 @@ define <2 x i13> @test_unsigned_v2f32_v2i13(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: movk w8, #17919, lsl #16 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: fmaxnm s1, s2, s1 -; CHECK-NEXT: fminnm s0, s0, s3 -; CHECK-NEXT: fminnm s1, s1, s3 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2s, #31, msl #8 +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i13> @llvm.fptoui.sat.v2f32.v2i13(<2 x float> %f) ret <2 x i13> %x @@ -816,21 +657,9 @@ define <2 x i16> @test_unsigned_v2f32_v2i16(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #65280 -; CHECK-NEXT: movk w8, #18303, lsl #16 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: fmaxnm s1, s2, s1 -; CHECK-NEXT: fminnm s0, s0, s3 -; CHECK-NEXT: fminnm s1, s1, s3 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi d1, #0x00ffff0000ffff +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i16> @llvm.fptoui.sat.v2f32.v2i16(<2 x float> %f) ret <2 x i16> %x @@ -839,21 +668,9 @@ define <2 x i19> @test_unsigned_v2f32_v2i19(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #65504 -; CHECK-NEXT: movk w8, #18687, lsl #16 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: fmaxnm s1, s2, s1 -; CHECK-NEXT: fminnm s0, s0, s3 -; CHECK-NEXT: fminnm s1, s1, s3 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2s, #7, msl #16 +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i19> @llvm.fptoui.sat.v2f32.v2i19(<2 x float> %f) ret <2 x i19> %x @@ -1026,28 +843,10 @@ define <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmov s4, #1.00000000 -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: fmaxnm s3, s0, s1 -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %x = call <4 x i1> @llvm.fptoui.sat.v4f32.v4i1(<4 x float> %f) ret <4 x i1> %x @@ -1056,29 +855,10 @@ define <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #1132396544 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s3, s0, s1 -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %x = call <4 x i8> @llvm.fptoui.sat.v4f32.v4i8(<4 x float> %f) ret <4 x i8> %x @@ -1087,30 +867,10 @@ define <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: movk w8, #17919, lsl #16 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: fmaxnm s3, s0, s1 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.4s, #31, msl #8 +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %x = call <4 x i13> @llvm.fptoui.sat.v4f32.v4i13(<4 x float> %f) ret <4 x i13> %x @@ -1119,30 +879,8 @@ define <4 x i16> @test_unsigned_v4f32_v4i16(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #65280 -; CHECK-NEXT: movk w8, #18303, lsl #16 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: fmaxnm s3, s0, s1 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: uqxtn v0.4h, v0.4s ; CHECK-NEXT: ret %x = call <4 x i16> @llvm.fptoui.sat.v4f32.v4i16(<4 x float> %f) ret <4 x i16> %x @@ -1151,29 +889,9 @@ define <4 x i19> @test_unsigned_v4f32_v4i19(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #65504 -; CHECK-NEXT: movk w8, #18687, lsl #16 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: fmaxnm s3, s0, s1 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: movi v1.4s, #7, msl #16 +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %x = call <4 x i19> @llvm.fptoui.sat.v4f32.v4i19(<4 x float> %f) ret <4 x i19> %x @@ -1688,109 +1406,61 @@ declare <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half>) define <4 x i1> @test_unsigned_v4f16_v4i1(<4 x half> %f) { -; CHECK-LABEL: test_unsigned_v4f16_v4i1: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s2, h0 -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: mov h5, v0.h[2] -; CHECK-NEXT: fmov s4, #1.00000000 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fmaxnm s3, s3, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: fminnm s2, s3, s4 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i1: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: movi v1.4s, #1 +; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i1: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: movi v1.4h, #1 +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: ret %x = call <4 x i1> @llvm.fptoui.sat.v4f16.v4i1(<4 x half> %f) ret <4 x i1> %x } define <4 x i8> @test_unsigned_v4f16_v4i8(<4 x half> %f) { -; CHECK-LABEL: test_unsigned_v4f16_v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s2, h0 -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: mov w8, #1132396544 -; CHECK-NEXT: mov h5, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fmaxnm s3, s3, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: fminnm s2, s3, s4 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i8: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: movi v1.2d, #0x0000ff000000ff +; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i8: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: movi d1, #0xff00ff00ff00ff +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: ret %x = call <4 x i8> @llvm.fptoui.sat.v4f16.v4i8(<4 x half> %f) ret <4 x i8> %x } define <4 x i13> @test_unsigned_v4f16_v4i13(<4 x half> %f) { -; CHECK-LABEL: test_unsigned_v4f16_v4i13: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s2, h0 -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: movk w8, #17919, lsl #16 -; CHECK-NEXT: mov h5, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fmaxnm s3, s3, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: fminnm s2, s3, s4 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i13: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: movi v1.4s, #31, msl #8 +; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i13: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: mvni v1.4h, #224, lsl #8 +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: ret %x = call <4 x i13> @llvm.fptoui.sat.v4f16.v4i13(<4 x half> %f) ret <4 x i13> %x } @@ -1798,31 +1468,9 @@ define <4 x i16> @test_unsigned_v4f16_v4i16(<4 x half> %f) { ; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i16: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: movi d1, #0000000000000000 ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: mov w8, #65280 -; CHECK-CVT-NEXT: movk w8, #18303, lsl #16 -; CHECK-CVT-NEXT: fmaxnm s2, s0, s1 -; CHECK-CVT-NEXT: mov s3, v0.s[1] -; CHECK-CVT-NEXT: fmov s4, w8 -; CHECK-CVT-NEXT: mov s5, v0.s[2] -; CHECK-CVT-NEXT: fminnm s2, s2, s4 -; CHECK-CVT-NEXT: fmaxnm s3, s3, s1 -; CHECK-CVT-NEXT: fmaxnm s5, s5, s1 -; CHECK-CVT-NEXT: fcvtzu w8, s2 -; CHECK-CVT-NEXT: fminnm s2, s3, s4 -; CHECK-CVT-NEXT: mov s3, v0.s[3] -; CHECK-CVT-NEXT: fmov s0, w8 -; CHECK-CVT-NEXT: fcvtzu w9, s2 -; CHECK-CVT-NEXT: fminnm s2, s5, s4 -; CHECK-CVT-NEXT: fmaxnm s1, s3, s1 -; CHECK-CVT-NEXT: mov v0.h[1], w9 -; CHECK-CVT-NEXT: fcvtzu w8, s2 -; CHECK-CVT-NEXT: fminnm s1, s1, s4 -; CHECK-CVT-NEXT: mov v0.h[2], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s1 -; CHECK-CVT-NEXT: mov v0.h[3], w8 -; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-NEXT: uqxtn v0.4h, v0.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i16: @@ -1836,62 +1484,21 @@ define <4 x i19> @test_unsigned_v4f16_v4i19(<4 x half> %f) { ; CHECK-LABEL: test_unsigned_v4f16_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s2, h0 -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: mov w8, #65504 -; CHECK-NEXT: movk w8, #18687, lsl #16 -; CHECK-NEXT: mov h5, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fmaxnm s3, s3, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: fminnm s2, s3, s4 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: movi v1.4s, #7, msl #16 +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %x = call <4 x i19> @llvm.fptoui.sat.v4f16.v4i19(<4 x half> %f) ret <4 x i19> %x } define <4 x i32> @test_unsigned_v4f16_v4i32_duplicate(<4 x half> %f) { -; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i32_duplicate: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i32_duplicate: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: fcvtzu w8, h0 -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzu w8, h2 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: mov v1.s[1], w8 -; CHECK-FP16-NEXT: fcvtzu w8, h2 -; CHECK-FP16-NEXT: mov v1.s[2], w8 -; CHECK-FP16-NEXT: fcvtzu w8, h0 -; CHECK-FP16-NEXT: mov v1.s[3], w8 -; CHECK-FP16-NEXT: mov v0.16b, v1.16b -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_unsigned_v4f16_v4i32_duplicate: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptoui.sat.v4f16.v4i32(<4 x half> %f) ret <4 x i32> %x } @@ -2162,59 +1769,60 @@ declare <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half>) define <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) { -; CHECK-LABEL: test_unsigned_v8f16_v8i1: -; CHECK: // %bb.0: -; CHECK-NEXT: movi d2, #0000000000000000 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov h4, v0.h[2] -; CHECK-NEXT: fmov s5, #1.00000000 -; CHECK-NEXT: mov h6, v0.h[3] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fmaxnm s3, s3, s2 -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fmaxnm s1, s1, s2 -; CHECK-NEXT: fminnm s3, s3, s5 -; CHECK-NEXT: fmaxnm s4, s4, s2 -; CHECK-NEXT: fmaxnm s6, s6, s2 -; CHECK-NEXT: fminnm s7, s1, s5 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov h3, v0.h[4] -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fminnm s6, s6, s5 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fcvtzu w8, s7 -; CHECK-NEXT: mov h7, v0.h[5] -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: mov v1.b[1], w8 -; CHECK-NEXT: fcvtzu w8, s4 -; CHECK-NEXT: fcvt s4, h7 -; CHECK-NEXT: fmaxnm s3, s3, s2 -; CHECK-NEXT: mov h7, v0.h[6] -; CHECK-NEXT: mov h0, v0.h[7] -; CHECK-NEXT: mov v1.b[2], w8 -; CHECK-NEXT: fcvtzu w8, s6 -; CHECK-NEXT: fmaxnm s4, s4, s2 -; CHECK-NEXT: fminnm s3, s3, s5 -; CHECK-NEXT: fcvt s6, h7 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov v1.b[3], w8 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: fminnm s3, s4, s5 -; CHECK-NEXT: fmaxnm s4, s6, s2 -; CHECK-NEXT: fmaxnm s0, s0, s2 -; CHECK-NEXT: mov v1.b[4], w8 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: fminnm s2, s4, s5 -; CHECK-NEXT: fminnm s0, s0, s5 -; CHECK-NEXT: mov v1.b[5], w8 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v1.b[6], w8 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: mov v1.b[7], w8 -; CHECK-NEXT: fmov d0, d1 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i1: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: mov s3, v1.s[2] +; CHECK-CVT-NEXT: mov s4, v1.s[3] +; CHECK-CVT-NEXT: mov s5, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: fcvtzu w8, s2 +; CHECK-CVT-NEXT: fcvtzu w11, s3 +; CHECK-CVT-NEXT: fcvtzu w12, s4 +; CHECK-CVT-NEXT: fcvtzu w13, s5 +; CHECK-CVT-NEXT: cmp w8, #1 +; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo +; CHECK-CVT-NEXT: cmp w9, #1 +; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo +; CHECK-CVT-NEXT: cmp w11, #1 +; CHECK-CVT-NEXT: csinc w11, w11, wzr, lo +; CHECK-CVT-NEXT: cmp w12, #1 +; CHECK-CVT-NEXT: csinc w12, w12, wzr, lo +; CHECK-CVT-NEXT: cmp w13, #1 +; CHECK-CVT-NEXT: csinc w13, w13, wzr, lo +; CHECK-CVT-NEXT: cmp w10, #1 +; CHECK-CVT-NEXT: csinc w10, w10, wzr, lo +; CHECK-CVT-NEXT: fmov s2, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fmov s1, w9 +; CHECK-CVT-NEXT: mov v2.s[1], w13 +; CHECK-CVT-NEXT: cmp w10, #1 +; CHECK-CVT-NEXT: csinc w9, w10, wzr, lo +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: mov v1.s[1], w8 +; CHECK-CVT-NEXT: mov v2.s[2], w9 +; CHECK-CVT-NEXT: cmp w10, #1 +; CHECK-CVT-NEXT: csinc w8, w10, wzr, lo +; CHECK-CVT-NEXT: mov v1.s[2], w11 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: mov v1.s[3], w12 +; CHECK-CVT-NEXT: xtn v0.4h, v2.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i1: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: movi v1.8h, #1 +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: xtn v0.8b, v0.8h +; CHECK-FP16-NEXT: ret %x = call <8 x i1> @llvm.fptoui.sat.v8f16.v8i1(<8 x half> %f) ret <8 x i1> %x } @@ -2222,166 +1830,115 @@ define <8 x i8> @test_unsigned_v8f16_v8i8(<8 x half> %f) { ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i8: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: movi d2, #0000000000000000 -; CHECK-CVT-NEXT: mov w8, #1132396544 -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: mov s3, v1.s[1] -; CHECK-CVT-NEXT: fmaxnm s4, s1, s2 -; CHECK-CVT-NEXT: fmov s5, w8 -; CHECK-CVT-NEXT: mov s6, v1.s[2] -; CHECK-CVT-NEXT: mov s7, v1.s[3] -; CHECK-CVT-NEXT: fmaxnm s3, s3, s2 -; CHECK-CVT-NEXT: fminnm s4, s4, s5 -; CHECK-CVT-NEXT: fmaxnm s6, s6, s2 -; CHECK-CVT-NEXT: fminnm s3, s3, s5 -; CHECK-CVT-NEXT: fcvtzu w8, s4 -; CHECK-CVT-NEXT: fminnm s4, s6, s5 -; CHECK-CVT-NEXT: mov s6, v0.s[1] -; CHECK-CVT-NEXT: fmov s1, w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fmaxnm s3, s7, s2 -; CHECK-CVT-NEXT: mov v1.b[1], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s4 -; CHECK-CVT-NEXT: fminnm s3, s3, s5 -; CHECK-CVT-NEXT: fmaxnm s4, s0, s2 -; CHECK-CVT-NEXT: mov v1.b[2], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fminnm s3, s4, s5 -; CHECK-CVT-NEXT: fmaxnm s4, s6, s2 -; CHECK-CVT-NEXT: mov s6, v0.s[2] +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w8, #255 +; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: mov s3, v1.s[2] +; CHECK-CVT-NEXT: mov s4, v1.s[3] +; CHECK-CVT-NEXT: mov s5, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: mov v1.b[3], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fminnm s3, s4, s5 -; CHECK-CVT-NEXT: fmaxnm s4, s6, s2 -; CHECK-CVT-NEXT: fmaxnm s0, s0, s2 -; CHECK-CVT-NEXT: mov v1.b[4], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fminnm s2, s4, s5 -; CHECK-CVT-NEXT: fminnm s0, s0, s5 -; CHECK-CVT-NEXT: mov v1.b[5], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s2 -; CHECK-CVT-NEXT: mov v1.b[6], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s0 -; CHECK-CVT-NEXT: mov v1.b[7], w8 -; CHECK-CVT-NEXT: fmov d0, d1 +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: fcvtzu w12, s3 +; CHECK-CVT-NEXT: fcvtzu w13, s4 +; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w12, #255 +; CHECK-CVT-NEXT: csel w12, w12, w8, lo +; CHECK-CVT-NEXT: cmp w13, #255 +; CHECK-CVT-NEXT: csel w13, w13, w8, lo +; CHECK-CVT-NEXT: cmp w14, #255 +; CHECK-CVT-NEXT: csel w14, w14, w8, lo +; CHECK-CVT-NEXT: cmp w11, #255 +; CHECK-CVT-NEXT: csel w11, w11, w8, lo +; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: fcvtzu w11, s1 +; CHECK-CVT-NEXT: fmov s1, w10 +; CHECK-CVT-NEXT: mov v2.s[1], w14 +; CHECK-CVT-NEXT: cmp w11, #255 +; CHECK-CVT-NEXT: csel w10, w11, w8, lo +; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: cmp w11, #255 +; CHECK-CVT-NEXT: csel w8, w11, w8, lo +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v2.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i8: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi d2, #0000000000000000 -; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: fcvt s3, h0 -; CHECK-FP16-NEXT: mov w8, #1132396544 -; CHECK-FP16-NEXT: mov h4, v0.h[2] -; CHECK-FP16-NEXT: mov h6, v0.h[3] -; CHECK-FP16-NEXT: fcvt s1, h1 -; CHECK-FP16-NEXT: fmov s5, w8 -; CHECK-FP16-NEXT: fmaxnm s3, s3, s2 -; CHECK-FP16-NEXT: fcvt s4, h4 -; CHECK-FP16-NEXT: fcvt s6, h6 -; CHECK-FP16-NEXT: fmaxnm s1, s1, s2 -; CHECK-FP16-NEXT: fminnm s3, s3, s5 -; CHECK-FP16-NEXT: fmaxnm s4, s4, s2 -; CHECK-FP16-NEXT: fmaxnm s6, s6, s2 -; CHECK-FP16-NEXT: fminnm s7, s1, s5 -; CHECK-FP16-NEXT: fcvtzu w8, s3 -; CHECK-FP16-NEXT: mov h3, v0.h[4] -; CHECK-FP16-NEXT: fminnm s4, s4, s5 -; CHECK-FP16-NEXT: fminnm s6, s6, s5 -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzu w8, s7 -; CHECK-FP16-NEXT: mov h7, v0.h[5] -; CHECK-FP16-NEXT: fcvt s3, h3 -; CHECK-FP16-NEXT: mov v1.b[1], w8 -; CHECK-FP16-NEXT: fcvtzu w8, s4 -; CHECK-FP16-NEXT: fcvt s4, h7 -; CHECK-FP16-NEXT: fmaxnm s3, s3, s2 -; CHECK-FP16-NEXT: mov h7, v0.h[6] -; CHECK-FP16-NEXT: mov h0, v0.h[7] -; CHECK-FP16-NEXT: mov v1.b[2], w8 -; CHECK-FP16-NEXT: fcvtzu w8, s6 -; CHECK-FP16-NEXT: fmaxnm s4, s4, s2 -; CHECK-FP16-NEXT: fminnm s3, s3, s5 -; CHECK-FP16-NEXT: fcvt s6, h7 -; CHECK-FP16-NEXT: fcvt s0, h0 -; CHECK-FP16-NEXT: mov v1.b[3], w8 -; CHECK-FP16-NEXT: fcvtzu w8, s3 -; CHECK-FP16-NEXT: fminnm s3, s4, s5 -; CHECK-FP16-NEXT: fmaxnm s4, s6, s2 -; CHECK-FP16-NEXT: fmaxnm s0, s0, s2 -; CHECK-FP16-NEXT: mov v1.b[4], w8 -; CHECK-FP16-NEXT: fcvtzu w8, s3 -; CHECK-FP16-NEXT: fminnm s2, s4, s5 -; CHECK-FP16-NEXT: fminnm s0, s0, s5 -; CHECK-FP16-NEXT: mov v1.b[5], w8 -; CHECK-FP16-NEXT: fcvtzu w8, s2 -; CHECK-FP16-NEXT: mov v1.b[6], w8 -; CHECK-FP16-NEXT: fcvtzu w8, s0 -; CHECK-FP16-NEXT: mov v1.b[7], w8 -; CHECK-FP16-NEXT: fmov d0, d1 +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: uqxtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret %x = call <8 x i8> @llvm.fptoui.sat.v8f16.v8i8(<8 x half> %f) ret <8 x i8> %x } define <8 x i13> @test_unsigned_v8f16_v8i13(<8 x half> %f) { -; CHECK-LABEL: test_unsigned_v8f16_v8i13: -; CHECK: // %bb.0: -; CHECK-NEXT: movi d2, #0000000000000000 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: movk w8, #17919, lsl #16 -; CHECK-NEXT: mov h4, v0.h[2] -; CHECK-NEXT: mov h6, v0.h[3] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fmaxnm s3, s3, s2 -; CHECK-NEXT: fmov s5, w8 -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fmaxnm s1, s1, s2 -; CHECK-NEXT: fminnm s3, s3, s5 -; CHECK-NEXT: fmaxnm s4, s4, s2 -; CHECK-NEXT: fmaxnm s6, s6, s2 -; CHECK-NEXT: fminnm s7, s1, s5 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov h3, v0.h[4] -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fminnm s6, s6, s5 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fcvtzu w8, s7 -; CHECK-NEXT: mov h7, v0.h[5] -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: mov v1.h[1], w8 -; CHECK-NEXT: fcvtzu w8, s4 -; CHECK-NEXT: fcvt s4, h7 -; CHECK-NEXT: fmaxnm s3, s3, s2 -; CHECK-NEXT: mov h7, v0.h[6] -; CHECK-NEXT: mov h0, v0.h[7] -; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s6 -; CHECK-NEXT: fmaxnm s4, s4, s2 -; CHECK-NEXT: fminnm s3, s3, s5 -; CHECK-NEXT: fcvt s6, h7 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: fminnm s3, s4, s5 -; CHECK-NEXT: fmaxnm s4, s6, s2 -; CHECK-NEXT: fmaxnm s0, s0, s2 -; CHECK-NEXT: mov v1.h[4], w8 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: fminnm s2, s4, s5 -; CHECK-NEXT: fminnm s0, s0, s5 -; CHECK-NEXT: mov v1.h[5], w8 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v1.h[6], w8 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: mov v1.h[7], w8 -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i13: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w8, #8191 +; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: mov s3, v1.s[2] +; CHECK-CVT-NEXT: mov s4, v1.s[3] +; CHECK-CVT-NEXT: mov s5, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: fcvtzu w12, s3 +; CHECK-CVT-NEXT: fcvtzu w13, s4 +; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: csel w12, w12, w8, lo +; CHECK-CVT-NEXT: cmp w13, w8 +; CHECK-CVT-NEXT: csel w13, w13, w8, lo +; CHECK-CVT-NEXT: cmp w14, w8 +; CHECK-CVT-NEXT: csel w14, w14, w8, lo +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w11, w11, w8, lo +; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: fcvtzu w11, s1 +; CHECK-CVT-NEXT: fmov s1, w10 +; CHECK-CVT-NEXT: mov v2.s[1], w14 +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w10, w11, w8, lo +; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w8, w11, w8, lo +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v2.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: mvni v1.8h, #224, lsl #8 +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: ret %x = call <8 x i13> @llvm.fptoui.sat.v8f16.v8i13(<8 x half> %f) ret <8 x i13> %x } @@ -2389,51 +1946,49 @@ define <8 x i16> @test_unsigned_v8f16_v8i16(<8 x half> %f) { ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i16: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: movi d2, #0000000000000000 -; CHECK-CVT-NEXT: mov w8, #65280 -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: movk w8, #18303, lsl #16 -; CHECK-CVT-NEXT: mov s3, v1.s[1] -; CHECK-CVT-NEXT: fmaxnm s4, s1, s2 -; CHECK-CVT-NEXT: fmov s5, w8 -; CHECK-CVT-NEXT: mov s6, v1.s[2] -; CHECK-CVT-NEXT: mov s7, v1.s[3] -; CHECK-CVT-NEXT: fmaxnm s3, s3, s2 -; CHECK-CVT-NEXT: fminnm s4, s4, s5 -; CHECK-CVT-NEXT: fmaxnm s6, s6, s2 -; CHECK-CVT-NEXT: fminnm s3, s3, s5 -; CHECK-CVT-NEXT: fcvtzu w8, s4 -; CHECK-CVT-NEXT: fminnm s4, s6, s5 -; CHECK-CVT-NEXT: mov s6, v0.s[1] -; CHECK-CVT-NEXT: fmov s1, w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fmaxnm s3, s7, s2 -; CHECK-CVT-NEXT: mov v1.h[1], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s4 -; CHECK-CVT-NEXT: fminnm s3, s3, s5 -; CHECK-CVT-NEXT: fmaxnm s4, s0, s2 -; CHECK-CVT-NEXT: mov v1.h[2], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fminnm s3, s4, s5 -; CHECK-CVT-NEXT: fmaxnm s4, s6, s2 -; CHECK-CVT-NEXT: mov s6, v0.s[2] +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w8, #65535 +; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: mov s3, v1.s[2] +; CHECK-CVT-NEXT: mov s4, v1.s[3] +; CHECK-CVT-NEXT: mov s5, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: mov v1.h[3], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fminnm s3, s4, s5 -; CHECK-CVT-NEXT: fmaxnm s4, s6, s2 -; CHECK-CVT-NEXT: fmaxnm s0, s0, s2 -; CHECK-CVT-NEXT: mov v1.h[4], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fminnm s2, s4, s5 -; CHECK-CVT-NEXT: fminnm s0, s0, s5 -; CHECK-CVT-NEXT: mov v1.h[5], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s2 -; CHECK-CVT-NEXT: mov v1.h[6], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s0 -; CHECK-CVT-NEXT: mov v1.h[7], w8 -; CHECK-CVT-NEXT: mov v0.16b, v1.16b +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: fcvtzu w12, s3 +; CHECK-CVT-NEXT: fcvtzu w13, s4 +; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: csel w12, w12, w8, lo +; CHECK-CVT-NEXT: cmp w13, w8 +; CHECK-CVT-NEXT: csel w13, w13, w8, lo +; CHECK-CVT-NEXT: cmp w14, w8 +; CHECK-CVT-NEXT: csel w14, w14, w8, lo +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w11, w11, w8, lo +; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: fcvtzu w11, s1 +; CHECK-CVT-NEXT: fmov s1, w10 +; CHECK-CVT-NEXT: mov v2.s[1], w14 +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w10, w11, w8, lo +; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w8, w11, w8, lo +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v2.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i16: @@ -2447,90 +2002,34 @@ define <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) { ; CHECK-LABEL: test_unsigned_v8f16_v8i19: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov w8, #65504 -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: movk w8, #18687, lsl #16 -; CHECK-NEXT: mov h4, v0.h[2] -; CHECK-NEXT: mov h5, v0.h[3] -; CHECK-NEXT: movi d2, #0000000000000000 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov h6, v1.h[1] -; CHECK-NEXT: mov h7, v1.h[2] -; CHECK-NEXT: mov h16, v1.h[3] -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fmaxnm s0, s0, s2 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: fcvt s16, h16 -; CHECK-NEXT: fmaxnm s3, s3, s2 -; CHECK-NEXT: fmaxnm s4, s4, s2 -; CHECK-NEXT: fmaxnm s5, s5, s2 -; CHECK-NEXT: fmaxnm s1, s1, s2 -; CHECK-NEXT: fmov s17, w8 -; CHECK-NEXT: fmaxnm s6, s6, s2 -; CHECK-NEXT: fmaxnm s7, s7, s2 -; CHECK-NEXT: fmaxnm s2, s16, s2 -; CHECK-NEXT: fminnm s0, s0, s17 -; CHECK-NEXT: fminnm s3, s3, s17 -; CHECK-NEXT: fminnm s4, s4, s17 -; CHECK-NEXT: fminnm s5, s5, s17 -; CHECK-NEXT: fminnm s1, s1, s17 -; CHECK-NEXT: fminnm s6, s6, s17 -; CHECK-NEXT: fminnm s7, s7, s17 -; CHECK-NEXT: fminnm s2, s2, s17 -; CHECK-NEXT: fcvtzu w0, s0 -; CHECK-NEXT: fcvtzu w1, s3 -; CHECK-NEXT: fcvtzu w2, s4 -; CHECK-NEXT: fcvtzu w3, s5 -; CHECK-NEXT: fcvtzu w4, s1 -; CHECK-NEXT: fcvtzu w5, s6 -; CHECK-NEXT: fcvtzu w6, s7 -; CHECK-NEXT: fcvtzu w7, s2 +; CHECK-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: movi v1.4s, #7, msl #16 +; CHECK-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mov w1, v2.s[1] +; CHECK-NEXT: mov w2, v2.s[2] +; CHECK-NEXT: mov w5, v0.s[1] +; CHECK-NEXT: mov w3, v2.s[3] +; CHECK-NEXT: mov w6, v0.s[2] +; CHECK-NEXT: mov w7, v0.s[3] +; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: fmov w0, s2 ; CHECK-NEXT: ret %x = call <8 x i19> @llvm.fptoui.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x } define <8 x i32> @test_unsigned_v8f16_v8i32_duplicate(<8 x half> %f) { -; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i32_duplicate: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i32_duplicate: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h4, v0.h[1] -; CHECK-FP16-NEXT: fcvtzu w9, h0 -; CHECK-FP16-NEXT: mov h2, v3.h[1] -; CHECK-FP16-NEXT: fcvtzu w8, h3 -; CHECK-FP16-NEXT: mov h5, v3.h[2] -; CHECK-FP16-NEXT: mov h3, v3.h[3] -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzu w8, h2 -; CHECK-FP16-NEXT: fmov s2, w9 -; CHECK-FP16-NEXT: fcvtzu w9, h4 -; CHECK-FP16-NEXT: mov h4, v0.h[2] -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: mov v1.s[1], w8 -; CHECK-FP16-NEXT: fcvtzu w8, h5 -; CHECK-FP16-NEXT: mov v2.s[1], w9 -; CHECK-FP16-NEXT: fcvtzu w9, h4 -; CHECK-FP16-NEXT: mov v1.s[2], w8 -; CHECK-FP16-NEXT: fcvtzu w8, h3 -; CHECK-FP16-NEXT: mov v2.s[2], w9 -; CHECK-FP16-NEXT: fcvtzu w9, h0 -; CHECK-FP16-NEXT: mov v1.s[3], w8 -; CHECK-FP16-NEXT: mov v2.s[3], w9 -; CHECK-FP16-NEXT: mov v0.16b, v2.16b -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: test_unsigned_v8f16_v8i32_duplicate: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret %x = call <8 x i32> @llvm.fptoui.sat.v8f16.v8i32(<8 x half> %f) ret <8 x i32> %x }