diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3464,24 +3464,44 @@ uint64_t DstWidth = DstVT.getScalarSizeInBits(); assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width"); - // TODO: Saturate to SatWidth explicitly. - if (SatWidth != DstWidth) + // In the absence of FP16 support, promote f16 to f32 and saturate the result. + if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) { + SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal); + SrcVT = MVT::f32; + } else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16) return SDValue(); - // In the absence of FP16 support, promote f16 to f32, like LowerFP_TO_INT(). - if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal), - Op.getOperand(1)); - + SDLoc DL(Op); // Cases that we can emit directly. if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 || (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) && - (DstVT == MVT::i64 || DstVT == MVT::i32)) - return Op; + DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32)) + return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, + DAG.getValueType(DstVT)); + + // Otherwise we emit a cvt that saturates to a higher BW, and saturate the + // result. This is only valid if the legal cvt is larger than the saturate + // width. + if (DstWidth < SatWidth) + return SDValue(); - // For all other cases, fall back on the expanded form. - return SDValue(); + SDValue NativeCvt = + DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT)); + SDValue Sat; + if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) { + SDValue MinC = DAG.getConstant( + APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT); + SDValue Min = DAG.getNode(ISD::SMIN, DL, DstVT, NativeCvt, MinC); + SDValue MaxC = DAG.getConstant( + APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT); + Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC); + } else { + SDValue MinC = DAG.getConstant( + APInt::getAllOnesValue(SatWidth).zextOrSelf(DstWidth), DL, DstVT); + Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC); + } + + return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat); } SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -20,13 +20,11 @@ define i1 @test_signed_i1_f32(float %f) nounwind { ; CHECK-LABEL: test_signed_i1_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s1, #-1.00000000 -; CHECK-NEXT: movi d2, #0000000000000000 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s1, s0, s1 -; CHECK-NEXT: fminnm s1, s1, s2 -; CHECK-NEXT: fcvtzs w8, s1 -; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcvtzs w8, s0 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: csel w8, w8, wzr, lt +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: csinv w8, w8, wzr, ge ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %x = call i1 @llvm.fptosi.sat.i1.f32(float %f) @@ -36,15 +34,13 @@ define i8 @test_signed_i8_f32(float %f) nounwind { ; CHECK-LABEL: test_signed_i8_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1023410176 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, #1123942400 -; CHECK-NEXT: fmaxnm s1, s0, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fminnm s1, s1, s2 -; CHECK-NEXT: fcvtzs w8, s1 -; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: fcvtzs w9, s0 +; CHECK-NEXT: mov w8, #127 +; CHECK-NEXT: cmp w9, #127 +; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mov w9, #-128 +; CHECK-NEXT: cmn w8, #128 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %x = call i8 @llvm.fptosi.sat.i8.f32(float %f) ret i8 %x @@ -53,16 +49,13 @@ define i13 @test_signed_i13_f32(float %f) nounwind { ; CHECK-LABEL: test_signed_i13_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-981467136 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, #61440 -; CHECK-NEXT: movk w8, #17791, lsl #16 -; CHECK-NEXT: fmaxnm s1, s0, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fminnm s1, s1, s2 -; CHECK-NEXT: fcvtzs w8, s1 -; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: fcvtzs w9, s0 +; CHECK-NEXT: mov w8, #4095 +; CHECK-NEXT: cmp w9, #4095 +; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mov w9, #-4096 +; CHECK-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %x = call i13 @llvm.fptosi.sat.i13.f32(float %f) ret i13 %x @@ -71,16 +64,13 @@ define i16 @test_signed_i16_f32(float %f) nounwind { ; CHECK-LABEL: test_signed_i16_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-956301312 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, #65024 -; CHECK-NEXT: movk w8, #18175, lsl #16 -; CHECK-NEXT: fmaxnm s1, s0, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fminnm s1, s1, s2 -; CHECK-NEXT: fcvtzs w8, s1 -; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: fcvtzs w8, s0 +; CHECK-NEXT: mov w9, #32767 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: mov w9, #-32768 +; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %x = call i16 @llvm.fptosi.sat.i16.f32(float %f) ret i16 %x @@ -89,16 +79,13 @@ define i19 @test_signed_i19_f32(float %f) nounwind { ; CHECK-LABEL: test_signed_i19_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-931135488 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, #65472 -; CHECK-NEXT: movk w8, #18559, lsl #16 -; CHECK-NEXT: fmaxnm s1, s0, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fminnm s1, s1, s2 -; CHECK-NEXT: fcvtzs w8, s1 -; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: fcvtzs w8, s0 +; CHECK-NEXT: mov w9, #262143 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: mov w9, #-262144 +; CHECK-NEXT: cmn w8, #64, lsl #12 // =262144 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %x = call i19 @llvm.fptosi.sat.i19.f32(float %f) ret i19 %x @@ -116,19 +103,13 @@ define i50 @test_signed_i50_f32(float %f) nounwind { ; CHECK-LABEL: test_signed_i50_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-671088640 -; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, #1476395007 -; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov x8, #-562949953421312 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: fcvtzs x8, s0 ; CHECK-NEXT: mov x9, #562949953421311 -; CHECK-NEXT: csel x8, x9, x8, gt -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel x0, xzr, x8, vs +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: csel x8, x8, x9, lt +; CHECK-NEXT: mov x9, #-562949953421312 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: csel x0, x8, x9, gt ; CHECK-NEXT: ret %x = call i50 @llvm.fptosi.sat.i50.f32(float %f) ret i50 %x @@ -219,13 +200,11 @@ define i1 @test_signed_i1_f64(double %f) nounwind { ; CHECK-LABEL: test_signed_i1_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d1, #-1.00000000 -; CHECK-NEXT: movi d2, #0000000000000000 -; CHECK-NEXT: fcmp d0, d0 -; CHECK-NEXT: fmaxnm d1, d0, d1 -; CHECK-NEXT: fminnm d1, d1, d2 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: csel w8, w8, wzr, lt +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: csinv w8, w8, wzr, ge ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %x = call i1 @llvm.fptosi.sat.i1.f64(double %f) @@ -235,16 +214,13 @@ define i8 @test_signed_i8_f64(double %f) nounwind { ; CHECK-LABEL: test_signed_i8_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4584664420663164928 -; CHECK-NEXT: fcmp d0, d0 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov x8, #211106232532992 -; CHECK-NEXT: movk x8, #16479, lsl #48 -; CHECK-NEXT: fmaxnm d1, d0, d1 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fminnm d1, d1, d2 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: fcvtzs w9, d0 +; CHECK-NEXT: mov w8, #127 +; CHECK-NEXT: cmp w9, #127 +; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mov w9, #-128 +; CHECK-NEXT: cmn w8, #128 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %x = call i8 @llvm.fptosi.sat.i8.f64(double %f) ret i8 %x @@ -253,16 +229,13 @@ define i13 @test_signed_i13_f64(double %f) nounwind { ; CHECK-LABEL: test_signed_i13_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4562146422526312448 -; CHECK-NEXT: fcmp d0, d0 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov x8, #279275953455104 -; CHECK-NEXT: movk x8, #16559, lsl #48 -; CHECK-NEXT: fmaxnm d1, d0, d1 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fminnm d1, d1, d2 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: fcvtzs w9, d0 +; CHECK-NEXT: mov w8, #4095 +; CHECK-NEXT: cmp w9, #4095 +; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mov w9, #-4096 +; CHECK-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %x = call i13 @llvm.fptosi.sat.i13.f64(double %f) ret i13 %x @@ -271,16 +244,13 @@ define i16 @test_signed_i16_f64(double %f) nounwind { ; CHECK-LABEL: test_signed_i16_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4548635623644200960 -; CHECK-NEXT: fcmp d0, d0 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov x8, #281200098803712 -; CHECK-NEXT: movk x8, #16607, lsl #48 -; CHECK-NEXT: fmaxnm d1, d0, d1 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fminnm d1, d1, d2 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: mov w9, #32767 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: mov w9, #-32768 +; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %x = call i16 @llvm.fptosi.sat.i16.f64(double %f) ret i16 %x @@ -289,16 +259,13 @@ define i19 @test_signed_i19_f64(double %f) nounwind { ; CHECK-LABEL: test_signed_i19_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4535124824762089472 -; CHECK-NEXT: fcmp d0, d0 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov x8, #281440616972288 -; CHECK-NEXT: movk x8, #16655, lsl #48 -; CHECK-NEXT: fmaxnm d1, d0, d1 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fminnm d1, d1, d2 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: mov w9, #262143 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: mov w9, #-262144 +; CHECK-NEXT: cmn w8, #64, lsl #12 // =262144 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %x = call i19 @llvm.fptosi.sat.i19.f64(double %f) ret i19 %x @@ -316,16 +283,13 @@ define i50 @test_signed_i50_f64(double %f) nounwind { ; CHECK-LABEL: test_signed_i50_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4395513236313604096 -; CHECK-NEXT: fcmp d0, d0 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov x8, #-16 -; CHECK-NEXT: movk x8, #17151, lsl #48 -; CHECK-NEXT: fmaxnm d1, d0, d1 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fminnm d1, d1, d2 -; CHECK-NEXT: fcvtzs x8, d1 -; CHECK-NEXT: csel x0, xzr, x8, vs +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: mov x9, #562949953421311 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: csel x8, x8, x9, lt +; CHECK-NEXT: mov x9, #-562949953421312 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: csel x0, x8, x9, gt ; CHECK-NEXT: ret %x = call i50 @llvm.fptosi.sat.i50.f64(double %f) ret i50 %x @@ -414,93 +378,134 @@ declare i128 @llvm.fptosi.sat.i128.f16(half) define i1 @test_signed_i1_f16(half %f) nounwind { -; CHECK-LABEL: test_signed_i1_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fmov s1, #-1.00000000 -; CHECK-NEXT: movi d2, #0000000000000000 -; CHECK-NEXT: fmaxnm s1, s0, s1 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fminnm s1, s1, s2 -; CHECK-NEXT: fcvtzs w8, s1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: and w0, w8, #0x1 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_i1_f16: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs w8, s0 +; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: csel w8, w8, wzr, lt +; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge +; CHECK-CVT-NEXT: and w0, w8, #0x1 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_i1_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w8, h0 +; CHECK-FP16-NEXT: cmp w8, #0 +; CHECK-FP16-NEXT: csel w8, w8, wzr, lt +; CHECK-FP16-NEXT: cmp w8, #0 +; CHECK-FP16-NEXT: csinv w8, w8, wzr, ge +; CHECK-FP16-NEXT: and w0, w8, #0x1 +; CHECK-FP16-NEXT: ret %x = call i1 @llvm.fptosi.sat.i1.f16(half %f) ret i1 %x } define i8 @test_signed_i8_f16(half %f) nounwind { -; CHECK-LABEL: test_signed_i8_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1023410176 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, #1123942400 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s1, s0, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fminnm s1, s1, s2 -; CHECK-NEXT: fcvtzs w8, s1 -; CHECK-NEXT: csel w0, wzr, w8, vs -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_i8_f16: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov w8, #127 +; CHECK-CVT-NEXT: fcvtzs w9, s0 +; CHECK-CVT-NEXT: cmp w9, #127 +; CHECK-CVT-NEXT: csel w8, w9, w8, lt +; CHECK-CVT-NEXT: mov w9, #-128 +; CHECK-CVT-NEXT: cmn w8, #128 +; CHECK-CVT-NEXT: csel w0, w8, w9, gt +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_i8_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w9, h0 +; CHECK-FP16-NEXT: mov w8, #127 +; CHECK-FP16-NEXT: cmp w9, #127 +; CHECK-FP16-NEXT: csel w8, w9, w8, lt +; CHECK-FP16-NEXT: mov w9, #-128 +; CHECK-FP16-NEXT: cmn w8, #128 +; CHECK-FP16-NEXT: csel w0, w8, w9, gt +; CHECK-FP16-NEXT: ret %x = call i8 @llvm.fptosi.sat.i8.f16(half %f) ret i8 %x } define i13 @test_signed_i13_f16(half %f) nounwind { -; CHECK-LABEL: test_signed_i13_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-981467136 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, #61440 -; CHECK-NEXT: movk w8, #17791, lsl #16 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s1, s0, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fminnm s1, s1, s2 -; CHECK-NEXT: fcvtzs w8, s1 -; CHECK-NEXT: csel w0, wzr, w8, vs -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_i13_f16: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov w8, #4095 +; CHECK-CVT-NEXT: fcvtzs w9, s0 +; CHECK-CVT-NEXT: cmp w9, #4095 +; CHECK-CVT-NEXT: csel w8, w9, w8, lt +; CHECK-CVT-NEXT: mov w9, #-4096 +; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w0, w8, w9, gt +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_i13_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w9, h0 +; CHECK-FP16-NEXT: mov w8, #4095 +; CHECK-FP16-NEXT: cmp w9, #4095 +; CHECK-FP16-NEXT: csel w8, w9, w8, lt +; CHECK-FP16-NEXT: mov w9, #-4096 +; CHECK-FP16-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: csel w0, w8, w9, gt +; CHECK-FP16-NEXT: ret %x = call i13 @llvm.fptosi.sat.i13.f16(half %f) ret i13 %x } define i16 @test_signed_i16_f16(half %f) nounwind { -; CHECK-LABEL: test_signed_i16_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-956301312 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, #65024 -; CHECK-NEXT: movk w8, #18175, lsl #16 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s1, s0, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fminnm s1, s1, s2 -; CHECK-NEXT: fcvtzs w8, s1 -; CHECK-NEXT: csel w0, wzr, w8, vs -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_i16_f16: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov w9, #32767 +; CHECK-CVT-NEXT: fcvtzs w8, s0 +; CHECK-CVT-NEXT: cmp w8, w9 +; CHECK-CVT-NEXT: csel w8, w8, w9, lt +; CHECK-CVT-NEXT: mov w9, #-32768 +; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w0, w8, w9, gt +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_i16_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w8, h0 +; CHECK-FP16-NEXT: mov w9, #32767 +; CHECK-FP16-NEXT: cmp w8, w9 +; CHECK-FP16-NEXT: csel w8, w8, w9, lt +; CHECK-FP16-NEXT: mov w9, #-32768 +; CHECK-FP16-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-FP16-NEXT: csel w0, w8, w9, gt +; CHECK-FP16-NEXT: ret %x = call i16 @llvm.fptosi.sat.i16.f16(half %f) ret i16 %x } define i19 @test_signed_i19_f16(half %f) nounwind { -; CHECK-LABEL: test_signed_i19_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-931135488 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, #65472 -; CHECK-NEXT: movk w8, #18559, lsl #16 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s1, s0, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fminnm s1, s1, s2 -; CHECK-NEXT: fcvtzs w8, s1 -; CHECK-NEXT: csel w0, wzr, w8, vs -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_i19_f16: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov w9, #262143 +; CHECK-CVT-NEXT: fcvtzs w8, s0 +; CHECK-CVT-NEXT: cmp w8, w9 +; CHECK-CVT-NEXT: csel w8, w8, w9, lt +; CHECK-CVT-NEXT: mov w9, #-262144 +; CHECK-CVT-NEXT: cmn w8, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: csel w0, w8, w9, gt +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_i19_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w8, h0 +; CHECK-FP16-NEXT: mov w9, #262143 +; CHECK-FP16-NEXT: cmp w8, w9 +; CHECK-FP16-NEXT: csel w8, w8, w9, lt +; CHECK-FP16-NEXT: mov w9, #-262144 +; CHECK-FP16-NEXT: cmn w8, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: csel w0, w8, w9, gt +; CHECK-FP16-NEXT: ret %x = call i19 @llvm.fptosi.sat.i19.f16(half %f) ret i19 %x } @@ -521,23 +526,28 @@ } define i50 @test_signed_i50_f16(half %f) nounwind { -; CHECK-LABEL: test_signed_i50_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-671088640 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov w9, #1476395007 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov x9, #-562949953421312 -; CHECK-NEXT: csel x8, x9, x8, lt -; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: mov x9, #562949953421311 -; CHECK-NEXT: csel x8, x9, x8, gt -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel x0, xzr, x8, vs -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_i50_f16: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov x9, #562949953421311 +; CHECK-CVT-NEXT: fcvtzs x8, s0 +; CHECK-CVT-NEXT: cmp x8, x9 +; CHECK-CVT-NEXT: csel x8, x8, x9, lt +; CHECK-CVT-NEXT: mov x9, #-562949953421312 +; CHECK-CVT-NEXT: cmp x8, x9 +; CHECK-CVT-NEXT: csel x0, x8, x9, gt +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_i50_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs x8, h0 +; CHECK-FP16-NEXT: mov x9, #562949953421311 +; CHECK-FP16-NEXT: cmp x8, x9 +; CHECK-FP16-NEXT: csel x8, x8, x9, lt +; CHECK-FP16-NEXT: mov x9, #-562949953421312 +; CHECK-FP16-NEXT: cmp x8, x9 +; CHECK-FP16-NEXT: csel x0, x8, x9, gt +; CHECK-FP16-NEXT: ret %x = call i50 @llvm.fptosi.sat.i50.f16(half %f) ret i50 %x } diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -823,18 +823,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmov s2, #-1.00000000 -; CHECK-NEXT: movi d3, #0000000000000000 -; CHECK-NEXT: fmaxnm s4, s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fminnm s4, s4, s3 -; CHECK-NEXT: fminnm s1, s2, s3 -; CHECK-NEXT: fcvtzs w8, s4 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel w9, wzr, w9, vs +; CHECK-NEXT: fcvtzs w9, s0 +; CHECK-NEXT: fcvtzs w8, s1 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: csel w8, w8, wzr, lt +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: csinv w8, w8, wzr, ge +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: csel w9, w9, wzr, lt +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: csinv w9, w9, wzr, ge ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -846,24 +844,22 @@ define <2 x i8> @test_signed_v2f32_v2i8(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1023410176 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #1123942400 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: fcvtzs w8, s3 +; CHECK-NEXT: mov w8, #127 +; CHECK-NEXT: fcvtzs w10, s0 +; CHECK-NEXT: mov w11, #-128 ; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: cmp w9, #127 +; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: cmn w9, #128 +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmp w10, #127 +; CHECK-NEXT: csel w8, w10, w8, lt +; CHECK-NEXT: cmn w8, #128 +; CHECK-NEXT: csel w8, w8, w11, gt +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i8> @llvm.fptosi.sat.v2f32.v2i8(<2 x float> %f) @@ -873,25 +869,22 @@ define <2 x i13> @test_signed_v2f32_v2i13(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-981467136 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #61440 -; CHECK-NEXT: movk w8, #17791, lsl #16 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: fcvtzs w8, s3 +; CHECK-NEXT: mov w8, #4095 +; CHECK-NEXT: fcvtzs w10, s0 +; CHECK-NEXT: mov w11, #-4096 ; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: cmp w9, #4095 +; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmp w10, #4095 +; CHECK-NEXT: csel w8, w10, w8, lt +; CHECK-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-NEXT: csel w8, w8, w11, gt +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i13> @llvm.fptosi.sat.v2f32.v2i13(<2 x float> %f) @@ -901,25 +894,22 @@ define <2 x i16> @test_signed_v2f32_v2i16(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-956301312 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #65024 -; CHECK-NEXT: movk w8, #18175, lsl #16 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: fcvtzs w8, s3 +; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: fcvtzs w10, s0 +; CHECK-NEXT: mov w11, #-32768 ; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w8, w10, w8, lt +; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w8, w8, w11, gt +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i16> @llvm.fptosi.sat.v2f32.v2i16(<2 x float> %f) @@ -929,25 +919,22 @@ define <2 x i19> @test_signed_v2f32_v2i19(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-931135488 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #65472 -; CHECK-NEXT: movk w8, #18559, lsl #16 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: fcvtzs w8, s3 +; CHECK-NEXT: mov w8, #262143 +; CHECK-NEXT: fcvtzs w10, s0 +; CHECK-NEXT: mov w11, #-262144 ; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w8, w10, w8, lt +; CHECK-NEXT: cmn w8, #64, lsl #12 // =262144 +; CHECK-NEXT: csel w8, w8, w11, gt +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i19> @llvm.fptosi.sat.v2f32.v2i19(<2 x float> %f) @@ -966,30 +953,22 @@ define <2 x i50> @test_signed_v2f32_v2i50(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i50: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-671088640 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: mov w9, #1476395007 -; CHECK-NEXT: mov x10, #562949953421311 -; CHECK-NEXT: fcvtzs x11, s0 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fmov s3, w9 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov x9, #-562949953421312 -; CHECK-NEXT: fcmp s1, s2 -; CHECK-NEXT: csel x8, x9, x8, lt -; CHECK-NEXT: fcmp s1, s3 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: fcmp s0, s2 -; CHECK-NEXT: csel x9, x9, x11, lt -; CHECK-NEXT: fcmp s0, s3 -; CHECK-NEXT: csel x9, x10, x9, gt -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: mov x8, #562949953421311 +; CHECK-NEXT: fcvtzs x10, s0 +; CHECK-NEXT: mov x11, #-562949953421312 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: csel x9, x9, x8, lt +; CHECK-NEXT: cmp x9, x11 +; CHECK-NEXT: csel x9, x9, x11, gt +; CHECK-NEXT: cmp x10, x8 +; CHECK-NEXT: csel x8, x10, x8, lt +; CHECK-NEXT: cmp x8, x11 +; CHECK-NEXT: csel x8, x8, x11, gt +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %x = call <2 x i50> @llvm.fptosi.sat.v2f32.v2i50(<2 x float> %f) ret <2 x i50> %x @@ -1161,33 +1140,31 @@ ; CHECK-LABEL: test_signed_v4f32_v4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmov s2, #-1.00000000 -; CHECK-NEXT: movi d4, #0000000000000000 -; CHECK-NEXT: mov s6, v0.s[2] -; CHECK-NEXT: fmaxnm s5, s0, s2 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s5, s5, s4 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzs w9, s5 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s2, s3, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 +; CHECK-NEXT: fcvtzs w9, s0 +; CHECK-NEXT: fcvtzs w8, s1 +; CHECK-NEXT: mov s1, v0.s[2] +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: csel w8, w8, wzr, lt +; CHECK-NEXT: fcvtzs w10, s1 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: mov s1, v0.s[3] +; CHECK-NEXT: csinv w8, w8, wzr, ge +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: csel w9, w9, wzr, lt +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: csinv w9, w9, wzr, ge +; CHECK-NEXT: cmp w10, #0 ; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s4 +; CHECK-NEXT: csel w9, w10, wzr, lt +; CHECK-NEXT: fcvtzs w10, s1 +; CHECK-NEXT: cmp w9, #0 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs w9, s1 +; CHECK-NEXT: csinv w8, w9, wzr, ge +; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: csel w9, w10, wzr, lt +; CHECK-NEXT: cmp w9, #0 ; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs +; CHECK-NEXT: csinv w8, w9, wzr, ge ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1198,36 +1175,34 @@ define <4 x i8> @test_signed_v4f32_v4i8(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1023410176 ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: mov s6, v0.s[2] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #1123942400 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s5, s0, s2 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s5, s5, s4 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzs w9, s5 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s2, s3, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s3, s3 +; CHECK-NEXT: mov w8, #127 +; CHECK-NEXT: fcvtzs w10, s0 +; CHECK-NEXT: mov w11, #-128 ; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs +; CHECK-NEXT: mov s1, v0.s[2] +; CHECK-NEXT: cmp w9, #127 +; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: fcvtzs w12, s1 +; CHECK-NEXT: cmn w9, #128 +; CHECK-NEXT: mov s1, v0.s[3] +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmp w10, #127 +; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: cmn w10, #128 +; CHECK-NEXT: csel w10, w10, w11, gt +; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: csel w10, w12, w8, lt +; CHECK-NEXT: fcvtzs w12, s1 +; CHECK-NEXT: cmn w10, #128 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: csel w9, w10, w11, gt +; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: csel w8, w12, w8, lt +; CHECK-NEXT: cmn w8, #128 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: csel w8, w8, w11, gt ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1238,37 +1213,34 @@ define <4 x i13> @test_signed_v4f32_v4i13(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i13: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-981467136 ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: mov s6, v0.s[2] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #61440 -; CHECK-NEXT: movk w8, #17791, lsl #16 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmaxnm s5, s0, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s5, s5, s4 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzs w9, s5 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s2, s3, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov w8, #4095 +; CHECK-NEXT: fcvtzs w10, s0 +; CHECK-NEXT: mov w11, #-4096 ; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs +; CHECK-NEXT: mov s1, v0.s[2] +; CHECK-NEXT: cmp w9, #4095 +; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: fcvtzs w12, s1 +; CHECK-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-NEXT: mov s1, v0.s[3] +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmp w10, #4095 +; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-NEXT: csel w10, w10, w11, gt +; CHECK-NEXT: cmp w12, #4095 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: csel w10, w12, w8, lt +; CHECK-NEXT: fcvtzs w12, s1 +; CHECK-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: csel w9, w10, w11, gt +; CHECK-NEXT: cmp w12, #4095 +; CHECK-NEXT: csel w8, w12, w8, lt +; CHECK-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: csel w8, w8, w11, gt ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1279,37 +1251,34 @@ define <4 x i16> @test_signed_v4f32_v4i16(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-956301312 ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: mov s6, v0.s[2] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #65024 -; CHECK-NEXT: movk w8, #18175, lsl #16 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmaxnm s5, s0, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s5, s5, s4 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzs w9, s5 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s2, s3, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: fcvtzs w10, s0 +; CHECK-NEXT: mov w11, #-32768 ; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs +; CHECK-NEXT: mov s1, v0.s[2] +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: fcvtzs w12, s1 +; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-NEXT: mov s1, v0.s[3] +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w10, w10, w11, gt +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: csel w10, w12, w8, lt +; CHECK-NEXT: fcvtzs w12, s1 +; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: csel w9, w10, w11, gt +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: csel w8, w12, w8, lt +; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: csel w8, w8, w11, gt ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1320,37 +1289,34 @@ define <4 x i19> @test_signed_v4f32_v4i19(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-931135488 ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: mov s6, v0.s[2] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #65472 -; CHECK-NEXT: movk w8, #18559, lsl #16 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s3, s1, s2 -; CHECK-NEXT: fmaxnm s5, s0, s2 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s5, s5, s4 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzs w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzs w9, s5 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmaxnm s2, s3, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov w8, #262143 +; CHECK-NEXT: fcvtzs w10, s0 +; CHECK-NEXT: mov w11, #-262144 ; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s4 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs +; CHECK-NEXT: mov s1, v0.s[2] +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: fcvtzs w12, s1 +; CHECK-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-NEXT: mov s1, v0.s[3] +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: cmn w10, #64, lsl #12 // =262144 +; CHECK-NEXT: csel w10, w10, w11, gt +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: csel w10, w12, w8, lt +; CHECK-NEXT: fcvtzs w12, s1 +; CHECK-NEXT: cmn w10, #64, lsl #12 // =262144 +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: csel w9, w10, w11, gt +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: csel w8, w12, w8, lt +; CHECK-NEXT: cmn w8, #64, lsl #12 // =262144 +; CHECK-NEXT: mov v0.s[2], w9 +; CHECK-NEXT: csel w8, w8, w11, gt ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %x = call <4 x i19> @llvm.fptosi.sat.v4f32.v4i19(<4 x float> %f) @@ -1369,43 +1335,31 @@ define <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i50: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-671088640 -; CHECK-NEXT: mov w9, #1476395007 -; CHECK-NEXT: mov s3, v0.s[1] -; CHECK-NEXT: mov x10, #562949953421311 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: mov x9, #-562949953421312 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: csel x8, x9, x8, lt -; CHECK-NEXT: fcmp s0, s2 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: csel x0, xzr, x8, vs -; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: csel x8, x9, x11, lt -; CHECK-NEXT: fcmp s3, s2 -; CHECK-NEXT: fcvtzs x11, s0 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: mov s3, v0.s[1] -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: csel x8, x9, x11, lt -; CHECK-NEXT: fcmp s0, s2 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel x2, xzr, x8, vs -; CHECK-NEXT: fcvtzs x8, s3 -; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: csel x8, x9, x8, lt -; CHECK-NEXT: fcmp s3, s2 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: csel x3, xzr, x8, vs +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov x9, #562949953421311 +; CHECK-NEXT: mov x10, #-562949953421312 +; CHECK-NEXT: fcvtzs x12, s0 +; CHECK-NEXT: mov s2, v1.s[1] +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: fcvtzs x11, s2 +; CHECK-NEXT: csel x8, x8, x9, lt +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: csel x2, x8, x10, gt +; CHECK-NEXT: cmp x11, x9 +; CHECK-NEXT: csel x8, x11, x9, lt +; CHECK-NEXT: fcvtzs x11, s1 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: csel x3, x8, x10, gt +; CHECK-NEXT: cmp x12, x9 +; CHECK-NEXT: csel x8, x12, x9, lt +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: csel x0, x8, x10, gt +; CHECK-NEXT: cmp x11, x9 +; CHECK-NEXT: csel x8, x11, x9, lt +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: csel x1, x8, x10, gt ; CHECK-NEXT: ret %x = call <4 x i50> @llvm.fptosi.sat.v4f32.v4i50(<4 x float> %f) ret <4 x i50> %x @@ -1658,18 +1612,16 @@ ; CHECK-LABEL: test_signed_v2f64_v2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fmov d2, #-1.00000000 -; CHECK-NEXT: movi d3, #0000000000000000 -; CHECK-NEXT: fmaxnm d4, d1, d2 -; CHECK-NEXT: fmaxnm d2, d0, d2 -; CHECK-NEXT: fcmp d1, d1 -; CHECK-NEXT: fminnm d4, d4, d3 -; CHECK-NEXT: fminnm d1, d2, d3 -; CHECK-NEXT: fcvtzs w8, d4 -; CHECK-NEXT: fcvtzs w9, d1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp d0, d0 -; CHECK-NEXT: csel w9, wzr, w9, vs +; CHECK-NEXT: fcvtzs w9, d0 +; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: csel w8, w8, wzr, lt +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: csinv w8, w8, wzr, ge +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: csel w9, w9, wzr, lt +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: csinv w9, w9, wzr, ge ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -1681,24 +1633,21 @@ define <2 x i8> @test_signed_v2f64_v2i8(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4584664420663164928 ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: mov x8, #211106232532992 -; CHECK-NEXT: movk x8, #16479, lsl #48 -; CHECK-NEXT: fcmp d1, d1 -; CHECK-NEXT: fmaxnm d3, d1, d2 -; CHECK-NEXT: fmaxnm d2, d0, d2 -; CHECK-NEXT: fmov d4, x8 -; CHECK-NEXT: fminnm d3, d3, d4 -; CHECK-NEXT: fminnm d1, d2, d4 -; CHECK-NEXT: fcvtzs w8, d3 +; CHECK-NEXT: mov w8, #127 +; CHECK-NEXT: fcvtzs w10, d0 +; CHECK-NEXT: mov w11, #-128 ; CHECK-NEXT: fcvtzs w9, d1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp d0, d0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: cmp w9, #127 +; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: cmn w9, #128 +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmp w10, #127 +; CHECK-NEXT: csel w8, w10, w8, lt +; CHECK-NEXT: cmn w8, #128 +; CHECK-NEXT: csel w8, w8, w11, gt +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i8> @llvm.fptosi.sat.v2f64.v2i8(<2 x double> %f) @@ -1708,24 +1657,21 @@ define <2 x i13> @test_signed_v2f64_v2i13(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4562146422526312448 ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: mov x8, #279275953455104 -; CHECK-NEXT: movk x8, #16559, lsl #48 -; CHECK-NEXT: fcmp d1, d1 -; CHECK-NEXT: fmaxnm d3, d1, d2 -; CHECK-NEXT: fmaxnm d2, d0, d2 -; CHECK-NEXT: fmov d4, x8 -; CHECK-NEXT: fminnm d3, d3, d4 -; CHECK-NEXT: fminnm d1, d2, d4 -; CHECK-NEXT: fcvtzs w8, d3 +; CHECK-NEXT: mov w8, #4095 +; CHECK-NEXT: fcvtzs w10, d0 +; CHECK-NEXT: mov w11, #-4096 ; CHECK-NEXT: fcvtzs w9, d1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp d0, d0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: cmp w9, #4095 +; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmp w10, #4095 +; CHECK-NEXT: csel w8, w10, w8, lt +; CHECK-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-NEXT: csel w8, w8, w11, gt +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i13> @llvm.fptosi.sat.v2f64.v2i13(<2 x double> %f) @@ -1735,24 +1681,21 @@ define <2 x i16> @test_signed_v2f64_v2i16(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4548635623644200960 ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: mov x8, #281200098803712 -; CHECK-NEXT: movk x8, #16607, lsl #48 -; CHECK-NEXT: fcmp d1, d1 -; CHECK-NEXT: fmaxnm d3, d1, d2 -; CHECK-NEXT: fmaxnm d2, d0, d2 -; CHECK-NEXT: fmov d4, x8 -; CHECK-NEXT: fminnm d3, d3, d4 -; CHECK-NEXT: fminnm d1, d2, d4 -; CHECK-NEXT: fcvtzs w8, d3 +; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: fcvtzs w10, d0 +; CHECK-NEXT: mov w11, #-32768 ; CHECK-NEXT: fcvtzs w9, d1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp d0, d0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w8, w10, w8, lt +; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w8, w8, w11, gt +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i16> @llvm.fptosi.sat.v2f64.v2i16(<2 x double> %f) @@ -1762,24 +1705,21 @@ define <2 x i19> @test_signed_v2f64_v2i19(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4535124824762089472 ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: mov x8, #281440616972288 -; CHECK-NEXT: movk x8, #16655, lsl #48 -; CHECK-NEXT: fcmp d1, d1 -; CHECK-NEXT: fmaxnm d3, d1, d2 -; CHECK-NEXT: fmaxnm d2, d0, d2 -; CHECK-NEXT: fmov d4, x8 -; CHECK-NEXT: fminnm d3, d3, d4 -; CHECK-NEXT: fminnm d1, d2, d4 -; CHECK-NEXT: fcvtzs w8, d3 +; CHECK-NEXT: mov w8, #262143 +; CHECK-NEXT: fcvtzs w10, d0 +; CHECK-NEXT: mov w11, #-262144 ; CHECK-NEXT: fcvtzs w9, d1 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp d0, d0 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w8, w10, w8, lt +; CHECK-NEXT: cmn w8, #64, lsl #12 // =262144 +; CHECK-NEXT: csel w8, w8, w11, gt +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i19> @llvm.fptosi.sat.v2f64.v2i19(<2 x double> %f) @@ -1803,24 +1743,21 @@ define <2 x i50> @test_signed_v2f64_v2i50(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i50: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4395513236313604096 ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: mov x8, #-16 -; CHECK-NEXT: movk x8, #17151, lsl #48 -; CHECK-NEXT: fcmp d1, d1 -; CHECK-NEXT: fmaxnm d3, d1, d2 -; CHECK-NEXT: fmaxnm d2, d0, d2 -; CHECK-NEXT: fmov d4, x8 -; CHECK-NEXT: fminnm d3, d3, d4 -; CHECK-NEXT: fminnm d1, d2, d4 -; CHECK-NEXT: fcvtzs x8, d3 +; CHECK-NEXT: mov x8, #562949953421311 +; CHECK-NEXT: fcvtzs x10, d0 +; CHECK-NEXT: mov x11, #-562949953421312 ; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: fcmp d0, d0 -; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: csel x9, x9, x8, lt +; CHECK-NEXT: cmp x9, x11 +; CHECK-NEXT: csel x9, x9, x11, gt +; CHECK-NEXT: cmp x10, x8 +; CHECK-NEXT: csel x8, x10, x8, lt +; CHECK-NEXT: cmp x8, x11 +; CHECK-NEXT: csel x8, x8, x11, gt +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %x = call <2 x i50> @llvm.fptosi.sat.v2f64.v2i50(<2 x double> %f) ret <2 x i50> %x @@ -1982,135 +1919,231 @@ declare <4 x i128> @llvm.fptosi.sat.v4f16.v4i128(<4 x half>) define <4 x i1> @test_signed_v4f16_v4i1(<4 x half> %f) { -; CHECK-LABEL: test_signed_v4f16_v4i1: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: fmov s2, #-1.00000000 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: movi d5, #0000000000000000 -; CHECK-NEXT: mov h6, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fmaxnm s7, s3, s2 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fmaxnm s4, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fminnm s7, s7, s5 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fcvtzs w9, s7 -; CHECK-NEXT: fminnm s1, s1, s5 -; CHECK-NEXT: fcvtzs w8, s4 -; CHECK-NEXT: fcvt s4, h0 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fmaxnm s2, s4, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s5 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v4f16_v4i1: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: fcvtzs w8, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: csel w8, w8, wzr, lt +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: csel w9, w9, wzr, lt +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: fcvtzs w10, s1 +; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge +; CHECK-CVT-NEXT: fcvt s1, h0 +; CHECK-CVT-NEXT: cmp w10, #0 +; CHECK-CVT-NEXT: fmov s0, w9 +; CHECK-CVT-NEXT: csel w9, w10, wzr, lt +; CHECK-CVT-NEXT: fcvtzs w10, s1 +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: mov v0.h[1], w8 +; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge +; CHECK-CVT-NEXT: cmp w10, #0 +; CHECK-CVT-NEXT: csel w9, w10, wzr, lt +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: mov v0.h[2], w8 +; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge +; CHECK-CVT-NEXT: mov v0.h[3], w8 +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v4f16_v4i1: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzs w9, h0 +; CHECK-FP16-NEXT: fcvtzs w8, h1 +; CHECK-FP16-NEXT: mov h1, v0.h[2] +; CHECK-FP16-NEXT: cmp w8, #0 +; CHECK-FP16-NEXT: csel w8, w8, wzr, lt +; CHECK-FP16-NEXT: fcvtzs w10, h1 +; CHECK-FP16-NEXT: cmp w8, #0 +; CHECK-FP16-NEXT: mov h1, v0.h[3] +; CHECK-FP16-NEXT: csinv w8, w8, wzr, ge +; CHECK-FP16-NEXT: cmp w9, #0 +; CHECK-FP16-NEXT: csel w9, w9, wzr, lt +; CHECK-FP16-NEXT: cmp w9, #0 +; CHECK-FP16-NEXT: csinv w9, w9, wzr, ge +; CHECK-FP16-NEXT: cmp w10, #0 +; CHECK-FP16-NEXT: fmov s0, w9 +; CHECK-FP16-NEXT: csel w9, w10, wzr, lt +; CHECK-FP16-NEXT: fcvtzs w10, h1 +; CHECK-FP16-NEXT: cmp w9, #0 +; CHECK-FP16-NEXT: mov v0.h[1], w8 +; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge +; CHECK-FP16-NEXT: cmp w10, #0 +; CHECK-FP16-NEXT: csel w9, w10, wzr, lt +; CHECK-FP16-NEXT: cmp w9, #0 +; CHECK-FP16-NEXT: mov v0.h[2], w8 +; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge +; CHECK-FP16-NEXT: mov v0.h[3], w8 +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-FP16-NEXT: ret %x = call <4 x i1> @llvm.fptosi.sat.v4f16.v4i1(<4 x half> %f) ret <4 x i1> %x } define <4 x i8> @test_signed_v4f16_v4i8(<4 x half> %f) { -; CHECK-LABEL: test_signed_v4f16_v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: mov w8, #-1023410176 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov h6, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #1123942400 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fmov s5, w8 -; CHECK-NEXT: fmaxnm s7, s3, s2 -; CHECK-NEXT: fmaxnm s4, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s7, s7, s5 -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fminnm s1, s1, s5 -; CHECK-NEXT: fcvtzs w9, s7 -; CHECK-NEXT: fcvtzs w8, s4 -; CHECK-NEXT: fcvt s4, h0 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fmaxnm s2, s4, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s5 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v4f16_v4i8: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov w8, #127 +; CHECK-CVT-NEXT: mov w11, #-128 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: fcvtzs w9, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: cmp w9, #127 +; CHECK-CVT-NEXT: csel w9, w9, w8, lt +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: cmn w9, #128 +; CHECK-CVT-NEXT: csel w9, w9, w11, gt +; CHECK-CVT-NEXT: cmp w10, #127 +; CHECK-CVT-NEXT: csel w10, w10, w8, lt +; CHECK-CVT-NEXT: cmn w10, #128 +; CHECK-CVT-NEXT: fcvtzs w12, s1 +; CHECK-CVT-NEXT: csel w10, w10, w11, gt +; CHECK-CVT-NEXT: fcvt s1, h0 +; CHECK-CVT-NEXT: cmp w12, #127 +; CHECK-CVT-NEXT: fmov s0, w10 +; CHECK-CVT-NEXT: csel w10, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s1 +; CHECK-CVT-NEXT: cmn w10, #128 +; CHECK-CVT-NEXT: mov v0.h[1], w9 +; CHECK-CVT-NEXT: csel w9, w10, w11, gt +; CHECK-CVT-NEXT: cmp w12, #127 +; CHECK-CVT-NEXT: csel w8, w12, w8, lt +; CHECK-CVT-NEXT: cmn w8, #128 +; CHECK-CVT-NEXT: mov v0.h[2], w9 +; CHECK-CVT-NEXT: csel w8, w8, w11, gt +; CHECK-CVT-NEXT: mov v0.h[3], w8 +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v4f16_v4i8: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: mov w8, #127 +; CHECK-FP16-NEXT: fcvtzs w10, h0 +; CHECK-FP16-NEXT: mov w11, #-128 +; CHECK-FP16-NEXT: fcvtzs w9, h1 +; CHECK-FP16-NEXT: mov h1, v0.h[2] +; CHECK-FP16-NEXT: cmp w9, #127 +; CHECK-FP16-NEXT: csel w9, w9, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h1 +; CHECK-FP16-NEXT: cmn w9, #128 +; CHECK-FP16-NEXT: mov h1, v0.h[3] +; CHECK-FP16-NEXT: csel w9, w9, w11, gt +; CHECK-FP16-NEXT: cmp w10, #127 +; CHECK-FP16-NEXT: csel w10, w10, w8, lt +; CHECK-FP16-NEXT: cmn w10, #128 +; CHECK-FP16-NEXT: csel w10, w10, w11, gt +; CHECK-FP16-NEXT: cmp w12, #127 +; CHECK-FP16-NEXT: fmov s0, w10 +; CHECK-FP16-NEXT: csel w10, w12, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h1 +; CHECK-FP16-NEXT: cmn w10, #128 +; CHECK-FP16-NEXT: mov v0.h[1], w9 +; CHECK-FP16-NEXT: csel w9, w10, w11, gt +; CHECK-FP16-NEXT: cmp w12, #127 +; CHECK-FP16-NEXT: csel w8, w12, w8, lt +; CHECK-FP16-NEXT: cmn w8, #128 +; CHECK-FP16-NEXT: mov v0.h[2], w9 +; CHECK-FP16-NEXT: csel w8, w8, w11, gt +; CHECK-FP16-NEXT: mov v0.h[3], w8 +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-FP16-NEXT: ret %x = call <4 x i8> @llvm.fptosi.sat.v4f16.v4i8(<4 x half> %f) ret <4 x i8> %x } define <4 x i13> @test_signed_v4f16_v4i13(<4 x half> %f) { -; CHECK-LABEL: test_signed_v4f16_v4i13: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: mov w8, #-981467136 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov h6, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #61440 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: movk w8, #17791, lsl #16 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fmaxnm s7, s3, s2 -; CHECK-NEXT: fmov s5, w8 -; CHECK-NEXT: fmaxnm s4, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s7, s7, s5 -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fminnm s1, s1, s5 -; CHECK-NEXT: fcvtzs w9, s7 -; CHECK-NEXT: fcvtzs w8, s4 -; CHECK-NEXT: fcvt s4, h0 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fmaxnm s2, s4, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s5 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v4f16_v4i13: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov w8, #4095 +; CHECK-CVT-NEXT: mov w11, #-4096 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: fcvtzs w9, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: cmp w9, #4095 +; CHECK-CVT-NEXT: csel w9, w9, w8, lt +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w9, w9, w11, gt +; CHECK-CVT-NEXT: cmp w10, #4095 +; CHECK-CVT-NEXT: csel w10, w10, w8, lt +; CHECK-CVT-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: fcvtzs w12, s1 +; CHECK-CVT-NEXT: csel w10, w10, w11, gt +; CHECK-CVT-NEXT: fcvt s1, h0 +; CHECK-CVT-NEXT: cmp w12, #4095 +; CHECK-CVT-NEXT: fmov s0, w10 +; CHECK-CVT-NEXT: csel w10, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s1 +; CHECK-CVT-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: mov v0.h[1], w9 +; CHECK-CVT-NEXT: csel w9, w10, w11, gt +; CHECK-CVT-NEXT: cmp w12, #4095 +; CHECK-CVT-NEXT: csel w8, w12, w8, lt +; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: mov v0.h[2], w9 +; CHECK-CVT-NEXT: csel w8, w8, w11, gt +; CHECK-CVT-NEXT: mov v0.h[3], w8 +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v4f16_v4i13: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: mov w8, #4095 +; CHECK-FP16-NEXT: fcvtzs w10, h0 +; CHECK-FP16-NEXT: mov w11, #-4096 +; CHECK-FP16-NEXT: fcvtzs w9, h1 +; CHECK-FP16-NEXT: mov h1, v0.h[2] +; CHECK-FP16-NEXT: cmp w9, #4095 +; CHECK-FP16-NEXT: csel w9, w9, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h1 +; CHECK-FP16-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: mov h1, v0.h[3] +; CHECK-FP16-NEXT: csel w9, w9, w11, gt +; CHECK-FP16-NEXT: cmp w10, #4095 +; CHECK-FP16-NEXT: csel w10, w10, w8, lt +; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: csel w10, w10, w11, gt +; CHECK-FP16-NEXT: cmp w12, #4095 +; CHECK-FP16-NEXT: fmov s0, w10 +; CHECK-FP16-NEXT: csel w10, w12, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h1 +; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: mov v0.h[1], w9 +; CHECK-FP16-NEXT: csel w9, w10, w11, gt +; CHECK-FP16-NEXT: cmp w12, #4095 +; CHECK-FP16-NEXT: csel w8, w12, w8, lt +; CHECK-FP16-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: mov v0.h[2], w9 +; CHECK-FP16-NEXT: csel w8, w8, w11, gt +; CHECK-FP16-NEXT: mov v0.h[3], w8 +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-FP16-NEXT: ret %x = call <4 x i13> @llvm.fptosi.sat.v4f16.v4i13(<4 x half> %f) ret <4 x i13> %x } @@ -2119,37 +2152,34 @@ ; CHECK-CVT-LABEL: test_signed_v4f16_v4i16: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: mov w8, #-956301312 -; CHECK-CVT-NEXT: fmov s2, w8 -; CHECK-CVT-NEXT: mov w8, #65024 +; CHECK-CVT-NEXT: mov w8, #32767 +; CHECK-CVT-NEXT: mov w11, #-32768 ; CHECK-CVT-NEXT: mov s1, v0.s[1] -; CHECK-CVT-NEXT: movk w8, #18175, lsl #16 -; CHECK-CVT-NEXT: mov s6, v0.s[2] -; CHECK-CVT-NEXT: fmaxnm s5, s0, s2 -; CHECK-CVT-NEXT: fmov s4, w8 -; CHECK-CVT-NEXT: fmaxnm s3, s1, s2 -; CHECK-CVT-NEXT: fcmp s1, s1 -; CHECK-CVT-NEXT: fmaxnm s1, s6, s2 -; CHECK-CVT-NEXT: fminnm s5, s5, s4 -; CHECK-CVT-NEXT: fminnm s3, s3, s4 -; CHECK-CVT-NEXT: fminnm s1, s1, s4 -; CHECK-CVT-NEXT: fcvtzs w9, s5 -; CHECK-CVT-NEXT: fcvtzs w8, s3 -; CHECK-CVT-NEXT: mov s3, v0.s[3] -; CHECK-CVT-NEXT: csel w8, wzr, w8, vs -; CHECK-CVT-NEXT: fcmp s0, s0 -; CHECK-CVT-NEXT: fmaxnm s2, s3, s2 -; CHECK-CVT-NEXT: csel w9, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s6, s6 -; CHECK-CVT-NEXT: fmov s0, w9 -; CHECK-CVT-NEXT: fcvtzs w9, s1 -; CHECK-CVT-NEXT: fminnm s1, s2, s4 -; CHECK-CVT-NEXT: mov v0.h[1], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s3, s3 +; CHECK-CVT-NEXT: fcvtzs w10, s0 ; CHECK-CVT-NEXT: fcvtzs w9, s1 -; CHECK-CVT-NEXT: mov v0.h[2], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s1 +; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s1, v0.s[3] +; CHECK-CVT-NEXT: csel w9, w9, w11, gt +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w10, w10, w8, lt +; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w10, w10, w11, gt +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: fmov s0, w10 +; CHECK-CVT-NEXT: csel w10, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s1 +; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov v0.h[1], w9 +; CHECK-CVT-NEXT: csel w9, w10, w11, gt +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: csel w8, w12, w8, lt +; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov v0.h[2], w9 +; CHECK-CVT-NEXT: csel w8, w8, w11, gt ; CHECK-CVT-NEXT: mov v0.h[3], w8 ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-CVT-NEXT: ret @@ -2163,46 +2193,77 @@ } define <4 x i19> @test_signed_v4f16_v4i19(<4 x half> %f) { -; CHECK-LABEL: test_signed_v4f16_v4i19: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: mov w8, #-931135488 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov h6, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #65472 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: movk w8, #18559, lsl #16 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fmaxnm s7, s3, s2 -; CHECK-NEXT: fmov s5, w8 -; CHECK-NEXT: fmaxnm s4, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s1, s6, s2 -; CHECK-NEXT: fminnm s7, s7, s5 -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fminnm s1, s1, s5 -; CHECK-NEXT: fcvtzs w9, s7 -; CHECK-NEXT: fcvtzs w8, s4 -; CHECK-NEXT: fcvt s4, h0 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fmaxnm s2, s4, s2 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: fminnm s1, s2, s5 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v0.s[3], w8 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v4f16_v4i19: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov w8, #262143 +; CHECK-CVT-NEXT: mov w11, #-262144 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: fcvtzs w9, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lt +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: csel w9, w9, w11, gt +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w10, w10, w8, lt +; CHECK-CVT-NEXT: cmn w10, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: fcvtzs w12, s1 +; CHECK-CVT-NEXT: csel w10, w10, w11, gt +; CHECK-CVT-NEXT: fcvt s1, h0 +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: fmov s0, w10 +; CHECK-CVT-NEXT: csel w10, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s1 +; CHECK-CVT-NEXT: cmn w10, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: mov v0.s[1], w9 +; CHECK-CVT-NEXT: csel w9, w10, w11, gt +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: csel w8, w12, w8, lt +; CHECK-CVT-NEXT: cmn w8, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: mov v0.s[2], w9 +; CHECK-CVT-NEXT: csel w8, w8, w11, gt +; CHECK-CVT-NEXT: mov v0.s[3], w8 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v4f16_v4i19: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: mov w8, #262143 +; CHECK-FP16-NEXT: fcvtzs w10, h0 +; CHECK-FP16-NEXT: mov w11, #-262144 +; CHECK-FP16-NEXT: fcvtzs w9, h1 +; CHECK-FP16-NEXT: mov h1, v0.h[2] +; CHECK-FP16-NEXT: cmp w9, w8 +; CHECK-FP16-NEXT: csel w9, w9, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h1 +; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: mov h1, v0.h[3] +; CHECK-FP16-NEXT: csel w9, w9, w11, gt +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w10, w10, w8, lt +; CHECK-FP16-NEXT: cmn w10, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: csel w10, w10, w11, gt +; CHECK-FP16-NEXT: cmp w12, w8 +; CHECK-FP16-NEXT: fmov s0, w10 +; CHECK-FP16-NEXT: csel w10, w12, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h1 +; CHECK-FP16-NEXT: cmn w10, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: mov v0.s[1], w9 +; CHECK-FP16-NEXT: csel w9, w10, w11, gt +; CHECK-FP16-NEXT: cmp w12, w8 +; CHECK-FP16-NEXT: csel w8, w12, w8, lt +; CHECK-FP16-NEXT: cmn w8, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: mov v0.s[2], w9 +; CHECK-FP16-NEXT: csel w8, w8, w11, gt +; CHECK-FP16-NEXT: mov v0.s[3], w8 +; CHECK-FP16-NEXT: ret %x = call <4 x i19> @llvm.fptosi.sat.v4f16.v4i19(<4 x half> %f) ret <4 x i19> %x } @@ -2235,51 +2296,69 @@ } define <4 x i50> @test_signed_v4f16_v4i50(<4 x half> %f) { -; CHECK-LABEL: test_signed_v4f16_v4i50: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-671088640 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fcvt s1, h0 -; CHECK-NEXT: mov w9, #1476395007 -; CHECK-NEXT: mov h4, v0.h[1] -; CHECK-NEXT: mov x10, #562949953421311 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fmov s3, w9 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov x9, #-562949953421312 -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcmp s1, s2 -; CHECK-NEXT: csel x8, x9, x8, lt -; CHECK-NEXT: fcmp s1, s3 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: mov h1, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x0, xzr, x8, vs -; CHECK-NEXT: fcmp s4, s2 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: csel x8, x9, x11, lt -; CHECK-NEXT: fcmp s4, s3 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs x11, s1 -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fcmp s1, s2 -; CHECK-NEXT: csel x8, x9, x11, lt -; CHECK-NEXT: fcmp s1, s3 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: csel x2, xzr, x8, vs -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fcmp s0, s2 -; CHECK-NEXT: csel x8, x9, x8, lt -; CHECK-NEXT: fcmp s0, s3 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel x3, xzr, x8, vs -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v4f16_v4i50: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: mov x8, #562949953421311 +; CHECK-CVT-NEXT: mov x11, #-562949953421312 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzs x9, s2 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: cmp x9, x8 +; CHECK-CVT-NEXT: fcvtzs x10, s1 +; CHECK-CVT-NEXT: fcvt s1, h3 +; CHECK-CVT-NEXT: csel x9, x9, x8, lt +; CHECK-CVT-NEXT: cmp x9, x11 +; CHECK-CVT-NEXT: csel x0, x9, x11, gt +; CHECK-CVT-NEXT: cmp x10, x8 +; CHECK-CVT-NEXT: fcvtzs x9, s1 +; CHECK-CVT-NEXT: csel x10, x10, x8, lt +; CHECK-CVT-NEXT: cmp x10, x11 +; CHECK-CVT-NEXT: csel x1, x10, x11, gt +; CHECK-CVT-NEXT: fcvtzs x10, s0 +; CHECK-CVT-NEXT: cmp x9, x8 +; CHECK-CVT-NEXT: csel x9, x9, x8, lt +; CHECK-CVT-NEXT: cmp x9, x11 +; CHECK-CVT-NEXT: csel x2, x9, x11, gt +; CHECK-CVT-NEXT: cmp x10, x8 +; CHECK-CVT-NEXT: csel x8, x10, x8, lt +; CHECK-CVT-NEXT: cmp x8, x11 +; CHECK-CVT-NEXT: csel x3, x8, x11, gt +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v4f16_v4i50: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzs x9, h0 +; CHECK-FP16-NEXT: mov x8, #562949953421311 +; CHECK-FP16-NEXT: mov x11, #-562949953421312 +; CHECK-FP16-NEXT: cmp x9, x8 +; CHECK-FP16-NEXT: fcvtzs x10, h1 +; CHECK-FP16-NEXT: mov h1, v0.h[2] +; CHECK-FP16-NEXT: csel x9, x9, x8, lt +; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: csel x0, x9, x11, gt +; CHECK-FP16-NEXT: cmp x10, x8 +; CHECK-FP16-NEXT: fcvtzs x9, h1 +; CHECK-FP16-NEXT: csel x10, x10, x8, lt +; CHECK-FP16-NEXT: cmp x10, x11 +; CHECK-FP16-NEXT: csel x1, x10, x11, gt +; CHECK-FP16-NEXT: fcvtzs x10, h0 +; CHECK-FP16-NEXT: cmp x9, x8 +; CHECK-FP16-NEXT: csel x9, x9, x8, lt +; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: csel x2, x9, x11, gt +; CHECK-FP16-NEXT: cmp x10, x8 +; CHECK-FP16-NEXT: csel x8, x10, x8, lt +; CHECK-FP16-NEXT: cmp x8, x11 +; CHECK-FP16-NEXT: csel x3, x8, x11, gt +; CHECK-FP16-NEXT: ret %x = call <4 x i50> @llvm.fptosi.sat.v4f16.v4i50(<4 x half> %f) ret <4 x i50> %x } @@ -2555,213 +2634,259 @@ declare <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half>) define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: fmov s2, #-1.00000000 -; CHECK-NEXT: fcvt s4, h0 -; CHECK-NEXT: movi d3, #0000000000000000 -; CHECK-NEXT: mov h6, v0.h[2] -; CHECK-NEXT: mov h7, v0.h[3] -; CHECK-NEXT: mov h17, v0.h[4] -; CHECK-NEXT: mov h18, v0.h[6] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fmaxnm s16, s4, s2 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: fmaxnm s5, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fminnm s16, s16, s3 -; CHECK-NEXT: mov h1, v0.h[5] -; CHECK-NEXT: mov h0, v0.h[7] -; CHECK-NEXT: fminnm s5, s5, s3 -; CHECK-NEXT: fcvtzs w9, s16 -; CHECK-NEXT: fmaxnm s16, s7, s2 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvtzs w8, s5 -; CHECK-NEXT: fmaxnm s5, s6, s2 -; CHECK-NEXT: fminnm s16, s16, s3 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fminnm s4, s5, s3 -; CHECK-NEXT: fcvt s5, h17 -; CHECK-NEXT: fcvt s17, h1 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fcvtzs w10, s4 -; CHECK-NEXT: fmaxnm s4, s5, s2 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: fcvtzs w9, s16 -; CHECK-NEXT: fmaxnm s6, s17, s2 -; CHECK-NEXT: mov v1.b[1], w8 -; CHECK-NEXT: csel w8, wzr, w10, vs -; CHECK-NEXT: fcmp s7, s7 -; CHECK-NEXT: fcvt s7, h18 -; CHECK-NEXT: fminnm s4, s4, s3 -; CHECK-NEXT: fminnm s6, s6, s3 -; CHECK-NEXT: mov v1.b[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s5, s5 -; CHECK-NEXT: fcvtzs w9, s4 -; CHECK-NEXT: fmaxnm s4, s7, s2 -; CHECK-NEXT: fcvtzs w10, s6 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: mov v1.b[3], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s17, s17 -; CHECK-NEXT: fminnm s4, s4, s3 -; CHECK-NEXT: fminnm s2, s2, s3 -; CHECK-NEXT: mov v1.b[4], w8 -; CHECK-NEXT: csel w8, wzr, w10, vs -; CHECK-NEXT: fcmp s7, s7 -; CHECK-NEXT: fcvtzs w9, s4 -; CHECK-NEXT: mov v1.b[5], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcvtzs w9, s2 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: mov v1.b[6], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v1.b[7], w8 -; CHECK-NEXT: fmov d0, d1 -; CHECK-NEXT: ret - %x = call <8 x i1> @llvm.fptosi.sat.v8f16.v8i1(<8 x half> %f) - ret <8 x i1> %x -} - -define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v8f16_v8i8: +; CHECK-CVT-LABEL: test_signed_v8f16_v8i1: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v3.4s, v0.4h -; CHECK-CVT-NEXT: mov w8, #-1023410176 -; CHECK-CVT-NEXT: fmov s1, w8 -; CHECK-CVT-NEXT: mov w8, #1123942400 -; CHECK-CVT-NEXT: mov s4, v3.s[1] -; CHECK-CVT-NEXT: mov s7, v3.s[2] -; CHECK-CVT-NEXT: mov s16, v3.s[3] -; CHECK-CVT-NEXT: fmov s2, w8 -; CHECK-CVT-NEXT: fmaxnm s6, s3, s1 -; CHECK-CVT-NEXT: fmaxnm s5, s4, s1 -; CHECK-CVT-NEXT: fcmp s4, s4 -; CHECK-CVT-NEXT: fcvtl2 v4.4s, v0.8h -; CHECK-CVT-NEXT: fmaxnm s0, s7, s1 -; CHECK-CVT-NEXT: fminnm s6, s6, s2 -; CHECK-CVT-NEXT: fminnm s5, s5, s2 -; CHECK-CVT-NEXT: fminnm s0, s0, s2 -; CHECK-CVT-NEXT: fcvtzs w9, s6 -; CHECK-CVT-NEXT: fmaxnm s6, s4, s1 -; CHECK-CVT-NEXT: fcvtzs w8, s5 -; CHECK-CVT-NEXT: fmaxnm s5, s16, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov h3, v0.h[4] +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: mov h2, v0.h[3] +; CHECK-CVT-NEXT: fcvtzs w8, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: csel w8, w8, wzr, lt +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: csel w9, w9, wzr, lt +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: fcvtzs w10, s1 +; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge +; CHECK-CVT-NEXT: cmp w10, #0 +; CHECK-CVT-NEXT: fmov s1, w9 +; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: csel w10, w10, wzr, lt +; CHECK-CVT-NEXT: mov v1.b[1], w8 +; CHECK-CVT-NEXT: cmp w10, #0 +; CHECK-CVT-NEXT: csinv w8, w10, wzr, ge +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: csel w9, w9, wzr, lt +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: mov v1.b[2], w8 +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge +; CHECK-CVT-NEXT: cmp w10, #0 +; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: csel w10, w10, wzr, lt +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v1.b[3], w8 +; CHECK-CVT-NEXT: cmp w10, #0 +; CHECK-CVT-NEXT: csinv w8, w10, wzr, ge +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: csel w9, w9, wzr, lt +; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov v1.b[4], w8 +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge +; CHECK-CVT-NEXT: cmp w10, #0 +; CHECK-CVT-NEXT: csel w9, w10, wzr, lt ; CHECK-CVT-NEXT: fcvtzs w10, s0 -; CHECK-CVT-NEXT: fminnm s6, s6, s2 -; CHECK-CVT-NEXT: csel w8, wzr, w8, vs -; CHECK-CVT-NEXT: fcmp s3, s3 -; CHECK-CVT-NEXT: mov s3, v4.s[1] -; CHECK-CVT-NEXT: fminnm s5, s5, s2 -; CHECK-CVT-NEXT: csel w9, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s7, s7 -; CHECK-CVT-NEXT: mov s7, v4.s[2] -; CHECK-CVT-NEXT: fmov s0, w9 -; CHECK-CVT-NEXT: fcvtzs w9, s5 -; CHECK-CVT-NEXT: fmaxnm s5, s3, s1 -; CHECK-CVT-NEXT: mov v0.b[1], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w10, vs -; CHECK-CVT-NEXT: fcmp s16, s16 -; CHECK-CVT-NEXT: fminnm s5, s5, s2 -; CHECK-CVT-NEXT: mov v0.b[2], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcvtzs w9, s6 -; CHECK-CVT-NEXT: fmaxnm s6, s7, s1 -; CHECK-CVT-NEXT: fcmp s4, s4 -; CHECK-CVT-NEXT: mov s4, v4.s[3] -; CHECK-CVT-NEXT: fcvtzs w10, s5 -; CHECK-CVT-NEXT: mov v0.b[3], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s3, s3 -; CHECK-CVT-NEXT: fminnm s5, s6, s2 -; CHECK-CVT-NEXT: fmaxnm s1, s4, s1 -; CHECK-CVT-NEXT: mov v0.b[4], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w10, vs -; CHECK-CVT-NEXT: fcmp s7, s7 -; CHECK-CVT-NEXT: fcvtzs w9, s5 -; CHECK-CVT-NEXT: fminnm s1, s1, s2 -; CHECK-CVT-NEXT: mov v0.b[5], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s4, s4 -; CHECK-CVT-NEXT: fcvtzs w9, s1 -; CHECK-CVT-NEXT: mov v0.b[6], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: mov v0.b[7], w8 -; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-CVT-NEXT: mov v1.b[5], w8 +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge +; CHECK-CVT-NEXT: cmp w10, #0 +; CHECK-CVT-NEXT: csel w9, w10, wzr, lt +; CHECK-CVT-NEXT: mov v1.b[6], w8 +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge +; CHECK-CVT-NEXT: mov v1.b[7], w8 +; CHECK-CVT-NEXT: fmov d0, d1 ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: test_signed_v8f16_v8i8: +; CHECK-FP16-LABEL: test_signed_v8f16_v8i1: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: mov w8, #-1023410176 -; CHECK-FP16-NEXT: fcvt s4, h0 -; CHECK-FP16-NEXT: mov h6, v0.h[2] -; CHECK-FP16-NEXT: mov h7, v0.h[3] -; CHECK-FP16-NEXT: mov h17, v0.h[4] -; CHECK-FP16-NEXT: fmov s2, w8 -; CHECK-FP16-NEXT: mov w8, #1123942400 -; CHECK-FP16-NEXT: fcvt s1, h1 -; CHECK-FP16-NEXT: mov h18, v0.h[6] -; CHECK-FP16-NEXT: fcvt s6, h6 -; CHECK-FP16-NEXT: fmov s3, w8 -; CHECK-FP16-NEXT: fmaxnm s16, s4, s2 -; CHECK-FP16-NEXT: fcvt s7, h7 -; CHECK-FP16-NEXT: fmaxnm s5, s1, s2 -; CHECK-FP16-NEXT: fcmp s1, s1 -; CHECK-FP16-NEXT: mov h1, v0.h[5] -; CHECK-FP16-NEXT: mov h0, v0.h[7] -; CHECK-FP16-NEXT: fminnm s16, s16, s3 -; CHECK-FP16-NEXT: fminnm s5, s5, s3 -; CHECK-FP16-NEXT: fcvt s0, h0 -; CHECK-FP16-NEXT: fcvtzs w9, s16 -; CHECK-FP16-NEXT: fmaxnm s16, s7, s2 -; CHECK-FP16-NEXT: fcvtzs w8, s5 -; CHECK-FP16-NEXT: fmaxnm s5, s6, s2 -; CHECK-FP16-NEXT: fminnm s16, s16, s3 -; CHECK-FP16-NEXT: csel w8, wzr, w8, vs -; CHECK-FP16-NEXT: fcmp s4, s4 -; CHECK-FP16-NEXT: fminnm s4, s5, s3 -; CHECK-FP16-NEXT: fcvt s5, h17 -; CHECK-FP16-NEXT: fcvt s17, h1 -; CHECK-FP16-NEXT: csel w9, wzr, w9, vs -; CHECK-FP16-NEXT: fcmp s6, s6 -; CHECK-FP16-NEXT: fcvtzs w10, s4 -; CHECK-FP16-NEXT: fmaxnm s4, s5, s2 +; CHECK-FP16-NEXT: fcvtzs w9, h0 +; CHECK-FP16-NEXT: mov h2, v0.h[3] +; CHECK-FP16-NEXT: mov h3, v0.h[5] +; CHECK-FP16-NEXT: fcvtzs w8, h1 +; CHECK-FP16-NEXT: mov h1, v0.h[2] +; CHECK-FP16-NEXT: cmp w8, #0 +; CHECK-FP16-NEXT: csel w8, w8, wzr, lt +; CHECK-FP16-NEXT: fcvtzs w10, h1 +; CHECK-FP16-NEXT: cmp w8, #0 +; CHECK-FP16-NEXT: csinv w8, w8, wzr, ge +; CHECK-FP16-NEXT: cmp w9, #0 +; CHECK-FP16-NEXT: csel w9, w9, wzr, lt +; CHECK-FP16-NEXT: cmp w9, #0 +; CHECK-FP16-NEXT: csinv w9, w9, wzr, ge +; CHECK-FP16-NEXT: cmp w10, #0 ; CHECK-FP16-NEXT: fmov s1, w9 -; CHECK-FP16-NEXT: fcvtzs w9, s16 -; CHECK-FP16-NEXT: fmaxnm s6, s17, s2 +; CHECK-FP16-NEXT: csel w9, w10, wzr, lt +; CHECK-FP16-NEXT: fcvtzs w10, h2 +; CHECK-FP16-NEXT: mov h2, v0.h[4] +; CHECK-FP16-NEXT: cmp w9, #0 ; CHECK-FP16-NEXT: mov v1.b[1], w8 -; CHECK-FP16-NEXT: csel w8, wzr, w10, vs -; CHECK-FP16-NEXT: fcmp s7, s7 -; CHECK-FP16-NEXT: fcvt s7, h18 -; CHECK-FP16-NEXT: fminnm s4, s4, s3 -; CHECK-FP16-NEXT: fminnm s6, s6, s3 +; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge +; CHECK-FP16-NEXT: cmp w10, #0 +; CHECK-FP16-NEXT: fcvtzs w9, h2 +; CHECK-FP16-NEXT: csel w10, w10, wzr, lt +; CHECK-FP16-NEXT: cmp w10, #0 +; CHECK-FP16-NEXT: mov h2, v0.h[6] ; CHECK-FP16-NEXT: mov v1.b[2], w8 -; CHECK-FP16-NEXT: csel w8, wzr, w9, vs -; CHECK-FP16-NEXT: fcmp s5, s5 -; CHECK-FP16-NEXT: fcvtzs w9, s4 -; CHECK-FP16-NEXT: fmaxnm s4, s7, s2 -; CHECK-FP16-NEXT: fcvtzs w10, s6 -; CHECK-FP16-NEXT: fmaxnm s2, s0, s2 +; CHECK-FP16-NEXT: csinv w8, w10, wzr, ge +; CHECK-FP16-NEXT: cmp w9, #0 +; CHECK-FP16-NEXT: fcvtzs w10, h3 +; CHECK-FP16-NEXT: csel w9, w9, wzr, lt +; CHECK-FP16-NEXT: mov h0, v0.h[7] +; CHECK-FP16-NEXT: cmp w9, #0 ; CHECK-FP16-NEXT: mov v1.b[3], w8 -; CHECK-FP16-NEXT: csel w8, wzr, w9, vs -; CHECK-FP16-NEXT: fcmp s17, s17 -; CHECK-FP16-NEXT: fminnm s4, s4, s3 -; CHECK-FP16-NEXT: fminnm s2, s2, s3 +; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge +; CHECK-FP16-NEXT: cmp w10, #0 +; CHECK-FP16-NEXT: csel w9, w10, wzr, lt +; CHECK-FP16-NEXT: fcvtzs w10, h2 +; CHECK-FP16-NEXT: cmp w9, #0 ; CHECK-FP16-NEXT: mov v1.b[4], w8 -; CHECK-FP16-NEXT: csel w8, wzr, w10, vs -; CHECK-FP16-NEXT: fcmp s7, s7 -; CHECK-FP16-NEXT: fcvtzs w9, s4 +; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge +; CHECK-FP16-NEXT: cmp w10, #0 +; CHECK-FP16-NEXT: csel w9, w10, wzr, lt +; CHECK-FP16-NEXT: fcvtzs w10, h0 +; CHECK-FP16-NEXT: cmp w9, #0 ; CHECK-FP16-NEXT: mov v1.b[5], w8 -; CHECK-FP16-NEXT: csel w8, wzr, w9, vs -; CHECK-FP16-NEXT: fcvtzs w9, s2 -; CHECK-FP16-NEXT: fcmp s0, s0 +; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge +; CHECK-FP16-NEXT: cmp w10, #0 +; CHECK-FP16-NEXT: csel w9, w10, wzr, lt +; CHECK-FP16-NEXT: cmp w9, #0 ; CHECK-FP16-NEXT: mov v1.b[6], w8 -; CHECK-FP16-NEXT: csel w8, wzr, w9, vs +; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge +; CHECK-FP16-NEXT: mov v1.b[7], w8 +; CHECK-FP16-NEXT: fmov d0, d1 +; CHECK-FP16-NEXT: ret + %x = call <8 x i1> @llvm.fptosi.sat.v8f16.v8i1(<8 x half> %f) + ret <8 x i1> %x +} + +define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) { +; CHECK-CVT-LABEL: test_signed_v8f16_v8i8: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-NEXT: mov w8, #127 +; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: fcvtzs w10, s1 +; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: mov s2, v1.s[2] +; CHECK-CVT-NEXT: cmp w9, #127 +; CHECK-CVT-NEXT: csel w11, w9, w8, lt +; CHECK-CVT-NEXT: mov w9, #-128 +; CHECK-CVT-NEXT: cmn w11, #128 +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: csel w11, w11, w9, gt +; CHECK-CVT-NEXT: cmp w10, #127 +; CHECK-CVT-NEXT: csel w10, w10, w8, lt +; CHECK-CVT-NEXT: mov s2, v1.s[3] +; CHECK-CVT-NEXT: cmn w10, #128 +; CHECK-CVT-NEXT: csel w10, w10, w9, gt +; CHECK-CVT-NEXT: cmp w12, #127 +; CHECK-CVT-NEXT: fmov s1, w10 +; CHECK-CVT-NEXT: csel w10, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: cmn w10, #128 +; CHECK-CVT-NEXT: csel w10, w10, w9, gt +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: mov v1.b[1], w11 +; CHECK-CVT-NEXT: cmp w12, #127 +; CHECK-CVT-NEXT: csel w11, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s0 +; CHECK-CVT-NEXT: cmn w11, #128 +; CHECK-CVT-NEXT: mov v1.b[2], w10 +; CHECK-CVT-NEXT: csel w10, w11, w9, gt +; CHECK-CVT-NEXT: cmp w12, #127 +; CHECK-CVT-NEXT: fcvtzs w11, s2 +; CHECK-CVT-NEXT: csel w12, w12, w8, lt +; CHECK-CVT-NEXT: mov s2, v0.s[2] +; CHECK-CVT-NEXT: cmn w12, #128 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: mov v1.b[3], w10 +; CHECK-CVT-NEXT: csel w10, w12, w9, gt +; CHECK-CVT-NEXT: cmp w11, #127 +; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: cmn w11, #128 +; CHECK-CVT-NEXT: mov v1.b[4], w10 +; CHECK-CVT-NEXT: csel w10, w11, w9, gt +; CHECK-CVT-NEXT: cmp w12, #127 +; CHECK-CVT-NEXT: csel w11, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s0 +; CHECK-CVT-NEXT: cmn w11, #128 +; CHECK-CVT-NEXT: mov v1.b[5], w10 +; CHECK-CVT-NEXT: csel w10, w11, w9, gt +; CHECK-CVT-NEXT: cmp w12, #127 +; CHECK-CVT-NEXT: csel w8, w12, w8, lt +; CHECK-CVT-NEXT: cmn w8, #128 +; CHECK-CVT-NEXT: mov v1.b[6], w10 +; CHECK-CVT-NEXT: csel w8, w8, w9, gt +; CHECK-CVT-NEXT: mov v1.b[7], w8 +; CHECK-CVT-NEXT: fmov d0, d1 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v8f16_v8i8: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: mov w8, #127 +; CHECK-FP16-NEXT: fcvtzs w10, h0 +; CHECK-FP16-NEXT: mov h2, v0.h[3] +; CHECK-FP16-NEXT: mov h3, v0.h[5] +; CHECK-FP16-NEXT: fcvtzs w9, h1 +; CHECK-FP16-NEXT: mov h1, v0.h[2] +; CHECK-FP16-NEXT: cmp w9, #127 +; CHECK-FP16-NEXT: csel w11, w9, w8, lt +; CHECK-FP16-NEXT: mov w9, #-128 +; CHECK-FP16-NEXT: cmn w11, #128 +; CHECK-FP16-NEXT: fcvtzs w12, h1 +; CHECK-FP16-NEXT: csel w11, w11, w9, gt +; CHECK-FP16-NEXT: cmp w10, #127 +; CHECK-FP16-NEXT: csel w10, w10, w8, lt +; CHECK-FP16-NEXT: cmn w10, #128 +; CHECK-FP16-NEXT: csel w10, w10, w9, gt +; CHECK-FP16-NEXT: cmp w12, #127 +; CHECK-FP16-NEXT: fmov s1, w10 +; CHECK-FP16-NEXT: csel w10, w12, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h2 +; CHECK-FP16-NEXT: mov h2, v0.h[4] +; CHECK-FP16-NEXT: cmn w10, #128 +; CHECK-FP16-NEXT: mov v1.b[1], w11 +; CHECK-FP16-NEXT: csel w10, w10, w9, gt +; CHECK-FP16-NEXT: cmp w12, #127 +; CHECK-FP16-NEXT: fcvtzs w11, h2 +; CHECK-FP16-NEXT: csel w12, w12, w8, lt +; CHECK-FP16-NEXT: cmn w12, #128 +; CHECK-FP16-NEXT: mov h2, v0.h[6] +; CHECK-FP16-NEXT: mov v1.b[2], w10 +; CHECK-FP16-NEXT: csel w10, w12, w9, gt +; CHECK-FP16-NEXT: cmp w11, #127 +; CHECK-FP16-NEXT: fcvtzs w12, h3 +; CHECK-FP16-NEXT: csel w11, w11, w8, lt +; CHECK-FP16-NEXT: mov h0, v0.h[7] +; CHECK-FP16-NEXT: cmn w11, #128 +; CHECK-FP16-NEXT: mov v1.b[3], w10 +; CHECK-FP16-NEXT: csel w10, w11, w9, gt +; CHECK-FP16-NEXT: cmp w12, #127 +; CHECK-FP16-NEXT: csel w11, w12, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h2 +; CHECK-FP16-NEXT: cmn w11, #128 +; CHECK-FP16-NEXT: mov v1.b[4], w10 +; CHECK-FP16-NEXT: csel w10, w11, w9, gt +; CHECK-FP16-NEXT: cmp w12, #127 +; CHECK-FP16-NEXT: csel w11, w12, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h0 +; CHECK-FP16-NEXT: cmn w11, #128 +; CHECK-FP16-NEXT: mov v1.b[5], w10 +; CHECK-FP16-NEXT: csel w10, w11, w9, gt +; CHECK-FP16-NEXT: cmp w12, #127 +; CHECK-FP16-NEXT: csel w8, w12, w8, lt +; CHECK-FP16-NEXT: cmn w8, #128 +; CHECK-FP16-NEXT: mov v1.b[6], w10 +; CHECK-FP16-NEXT: csel w8, w8, w9, gt ; CHECK-FP16-NEXT: mov v1.b[7], w8 ; CHECK-FP16-NEXT: fmov d0, d1 ; CHECK-FP16-NEXT: ret @@ -2770,78 +2895,137 @@ } define <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i13: -; CHECK: // %bb.0: -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: mov w8, #-981467136 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov h6, v0.h[2] -; CHECK-NEXT: mov h7, v0.h[3] -; CHECK-NEXT: mov h17, v0.h[4] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #61440 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: movk w8, #17791, lsl #16 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: mov h18, v0.h[6] -; CHECK-NEXT: fmaxnm s16, s3, s2 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: fmov s5, w8 -; CHECK-NEXT: fmaxnm s4, s1, s2 -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: mov h1, v0.h[5] -; CHECK-NEXT: mov h0, v0.h[7] -; CHECK-NEXT: fminnm s16, s16, s5 -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvtzs w9, s16 -; CHECK-NEXT: fmaxnm s16, s7, s2 -; CHECK-NEXT: fcvtzs w8, s4 -; CHECK-NEXT: fmaxnm s4, s6, s2 -; CHECK-NEXT: fminnm s16, s16, s5 -; CHECK-NEXT: csel w8, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fminnm s3, s4, s5 -; CHECK-NEXT: fcvt s4, h17 -; CHECK-NEXT: fcvt s17, h1 -; CHECK-NEXT: csel w9, wzr, w9, vs -; CHECK-NEXT: fcmp s6, s6 -; CHECK-NEXT: fcvtzs w10, s3 -; CHECK-NEXT: fmaxnm s3, s4, s2 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: fcvtzs w9, s16 -; CHECK-NEXT: fmaxnm s6, s17, s2 -; CHECK-NEXT: mov v1.h[1], w8 -; CHECK-NEXT: csel w8, wzr, w10, vs -; CHECK-NEXT: fcmp s7, s7 -; CHECK-NEXT: fcvt s7, h18 -; CHECK-NEXT: fminnm s3, s3, s5 -; CHECK-NEXT: fminnm s6, s6, s5 -; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs w9, s3 -; CHECK-NEXT: fmaxnm s3, s7, s2 -; CHECK-NEXT: fcvtzs w10, s6 -; CHECK-NEXT: fmaxnm s2, s0, s2 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcmp s17, s17 -; CHECK-NEXT: fminnm s3, s3, s5 -; CHECK-NEXT: fminnm s2, s2, s5 -; CHECK-NEXT: mov v1.h[4], w8 -; CHECK-NEXT: csel w8, wzr, w10, vs -; CHECK-NEXT: fcmp s7, s7 -; CHECK-NEXT: fcvtzs w9, s3 -; CHECK-NEXT: mov v1.h[5], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: fcvtzs w9, s2 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: mov v1.h[6], w8 -; CHECK-NEXT: csel w8, wzr, w9, vs -; CHECK-NEXT: mov v1.h[7], w8 -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v8f16_v8i13: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov w9, #4095 +; CHECK-CVT-NEXT: mov w10, #-4096 +; CHECK-CVT-NEXT: mov h3, v0.h[4] +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzs w11, s2 +; CHECK-CVT-NEXT: mov h2, v0.h[3] +; CHECK-CVT-NEXT: fcvtzs w8, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: cmp w8, #4095 +; CHECK-CVT-NEXT: csel w8, w8, w9, lt +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w8, w8, w10, gt +; CHECK-CVT-NEXT: cmp w11, #4095 +; CHECK-CVT-NEXT: csel w11, w11, w9, lt +; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: fcvtzs w12, s1 +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: cmp w12, #4095 +; CHECK-CVT-NEXT: fmov s1, w11 +; CHECK-CVT-NEXT: fcvtzs w11, s2 +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: csel w12, w12, w9, lt +; CHECK-CVT-NEXT: mov v1.h[1], w8 +; CHECK-CVT-NEXT: cmn w12, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w8, w12, w10, gt +; CHECK-CVT-NEXT: cmp w11, #4095 +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: csel w11, w11, w9, lt +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: mov v1.h[2], w8 +; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w8, w11, w10, gt +; CHECK-CVT-NEXT: cmp w12, #4095 +; CHECK-CVT-NEXT: fcvtzs w11, s2 +; CHECK-CVT-NEXT: csel w12, w12, w9, lt +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v1.h[3], w8 +; CHECK-CVT-NEXT: cmn w12, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w8, w12, w10, gt +; CHECK-CVT-NEXT: cmp w11, #4095 +; CHECK-CVT-NEXT: csel w11, w11, w9, lt +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov v1.h[4], w8 +; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w8, w11, w10, gt +; CHECK-CVT-NEXT: cmp w12, #4095 +; CHECK-CVT-NEXT: csel w11, w12, w9, lt +; CHECK-CVT-NEXT: fcvtzs w12, s0 +; CHECK-CVT-NEXT: mov v1.h[5], w8 +; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w8, w11, w10, gt +; CHECK-CVT-NEXT: cmp w12, #4095 +; CHECK-CVT-NEXT: csel w9, w12, w9, lt +; CHECK-CVT-NEXT: mov v1.h[6], w8 +; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w8, w9, w10, gt +; CHECK-CVT-NEXT: mov v1.h[7], w8 +; CHECK-CVT-NEXT: mov v0.16b, v1.16b +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v8f16_v8i13: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: mov w8, #4095 +; CHECK-FP16-NEXT: fcvtzs w10, h0 +; CHECK-FP16-NEXT: mov w11, #-4096 +; CHECK-FP16-NEXT: mov h2, v0.h[3] +; CHECK-FP16-NEXT: mov h3, v0.h[5] +; CHECK-FP16-NEXT: fcvtzs w9, h1 +; CHECK-FP16-NEXT: mov h1, v0.h[2] +; CHECK-FP16-NEXT: cmp w9, #4095 +; CHECK-FP16-NEXT: csel w9, w9, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h1 +; CHECK-FP16-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: csel w9, w9, w11, gt +; CHECK-FP16-NEXT: cmp w10, #4095 +; CHECK-FP16-NEXT: csel w10, w10, w8, lt +; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: csel w10, w10, w11, gt +; CHECK-FP16-NEXT: cmp w12, #4095 +; CHECK-FP16-NEXT: fmov s1, w10 +; CHECK-FP16-NEXT: csel w10, w12, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h2 +; CHECK-FP16-NEXT: mov h2, v0.h[4] +; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: mov v1.h[1], w9 +; CHECK-FP16-NEXT: csel w9, w10, w11, gt +; CHECK-FP16-NEXT: cmp w12, #4095 +; CHECK-FP16-NEXT: fcvtzs w10, h2 +; CHECK-FP16-NEXT: csel w12, w12, w8, lt +; CHECK-FP16-NEXT: cmn w12, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: mov h2, v0.h[6] +; CHECK-FP16-NEXT: mov v1.h[2], w9 +; CHECK-FP16-NEXT: csel w9, w12, w11, gt +; CHECK-FP16-NEXT: cmp w10, #4095 +; CHECK-FP16-NEXT: fcvtzs w12, h3 +; CHECK-FP16-NEXT: csel w10, w10, w8, lt +; CHECK-FP16-NEXT: mov h0, v0.h[7] +; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: mov v1.h[3], w9 +; CHECK-FP16-NEXT: csel w9, w10, w11, gt +; CHECK-FP16-NEXT: cmp w12, #4095 +; CHECK-FP16-NEXT: csel w10, w12, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h2 +; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: mov v1.h[4], w9 +; CHECK-FP16-NEXT: csel w9, w10, w11, gt +; CHECK-FP16-NEXT: cmp w12, #4095 +; CHECK-FP16-NEXT: csel w10, w12, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h0 +; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: mov v1.h[5], w9 +; CHECK-FP16-NEXT: csel w9, w10, w11, gt +; CHECK-FP16-NEXT: cmp w12, #4095 +; CHECK-FP16-NEXT: csel w8, w12, w8, lt +; CHECK-FP16-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-FP16-NEXT: mov v1.h[6], w9 +; CHECK-FP16-NEXT: csel w8, w8, w11, gt +; CHECK-FP16-NEXT: mov v1.h[7], w8 +; CHECK-FP16-NEXT: mov v0.16b, v1.16b +; CHECK-FP16-NEXT: ret %x = call <8 x i13> @llvm.fptosi.sat.v8f16.v8i13(<8 x half> %f) ret <8 x i13> %x } @@ -2850,66 +3034,64 @@ ; CHECK-CVT-LABEL: test_signed_v8f16_v8i16: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: mov w8, #-956301312 -; CHECK-CVT-NEXT: fmov s3, w8 -; CHECK-CVT-NEXT: mov w8, #65024 +; CHECK-CVT-NEXT: mov w8, #32767 +; CHECK-CVT-NEXT: mov w11, #-32768 +; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-CVT-NEXT: mov s2, v1.s[1] -; CHECK-CVT-NEXT: movk w8, #18175, lsl #16 -; CHECK-CVT-NEXT: mov s7, v1.s[2] -; CHECK-CVT-NEXT: mov s16, v1.s[3] -; CHECK-CVT-NEXT: fmaxnm s6, s1, s3 -; CHECK-CVT-NEXT: fmov s5, w8 -; CHECK-CVT-NEXT: fmaxnm s4, s2, s3 -; CHECK-CVT-NEXT: fcmp s2, s2 -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-CVT-NEXT: fmaxnm s0, s7, s3 -; CHECK-CVT-NEXT: fminnm s6, s6, s5 -; CHECK-CVT-NEXT: fminnm s4, s4, s5 -; CHECK-CVT-NEXT: fminnm s0, s0, s5 -; CHECK-CVT-NEXT: fcvtzs w9, s6 -; CHECK-CVT-NEXT: fmaxnm s6, s2, s3 -; CHECK-CVT-NEXT: fcvtzs w8, s4 -; CHECK-CVT-NEXT: fmaxnm s4, s16, s3 -; CHECK-CVT-NEXT: fcvtzs w10, s0 -; CHECK-CVT-NEXT: fminnm s6, s6, s5 -; CHECK-CVT-NEXT: csel w8, wzr, w8, vs -; CHECK-CVT-NEXT: fcmp s1, s1 -; CHECK-CVT-NEXT: mov s1, v2.s[1] -; CHECK-CVT-NEXT: fminnm s4, s4, s5 -; CHECK-CVT-NEXT: csel w9, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s7, s7 -; CHECK-CVT-NEXT: mov s7, v2.s[2] -; CHECK-CVT-NEXT: fmov s0, w9 -; CHECK-CVT-NEXT: fcvtzs w9, s4 -; CHECK-CVT-NEXT: fmaxnm s4, s1, s3 -; CHECK-CVT-NEXT: mov v0.h[1], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w10, vs -; CHECK-CVT-NEXT: fcmp s16, s16 -; CHECK-CVT-NEXT: fminnm s4, s4, s5 -; CHECK-CVT-NEXT: mov v0.h[2], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcvtzs w9, s6 -; CHECK-CVT-NEXT: fmaxnm s6, s7, s3 -; CHECK-CVT-NEXT: fcmp s2, s2 -; CHECK-CVT-NEXT: mov s2, v2.s[3] -; CHECK-CVT-NEXT: fcvtzs w10, s4 -; CHECK-CVT-NEXT: mov v0.h[3], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s1, s1 -; CHECK-CVT-NEXT: fminnm s4, s6, s5 -; CHECK-CVT-NEXT: fmaxnm s1, s2, s3 -; CHECK-CVT-NEXT: mov v0.h[4], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w10, vs -; CHECK-CVT-NEXT: fcmp s7, s7 -; CHECK-CVT-NEXT: fcvtzs w9, s4 -; CHECK-CVT-NEXT: fminnm s1, s1, s5 -; CHECK-CVT-NEXT: mov v0.h[5], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: fcmp s2, s2 -; CHECK-CVT-NEXT: fcvtzs w9, s1 -; CHECK-CVT-NEXT: mov v0.h[6], w8 -; CHECK-CVT-NEXT: csel w8, wzr, w9, vs -; CHECK-CVT-NEXT: mov v0.h[7], w8 +; CHECK-CVT-NEXT: fcvtzs w10, s1 +; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: mov s2, v1.s[2] +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s2, v1.s[3] +; CHECK-CVT-NEXT: csel w9, w9, w11, gt +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w10, w10, w8, lt +; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w10, w10, w11, gt +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: fmov s1, w10 +; CHECK-CVT-NEXT: csel w10, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: mov v1.h[1], w9 +; CHECK-CVT-NEXT: csel w9, w10, w11, gt +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: csel w10, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s0 +; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov v1.h[2], w9 +; CHECK-CVT-NEXT: csel w9, w10, w11, gt +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: csel w12, w12, w8, lt +; CHECK-CVT-NEXT: mov s2, v0.s[2] +; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: mov v1.h[3], w9 +; CHECK-CVT-NEXT: csel w9, w12, w11, gt +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w10, w10, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov v1.h[4], w9 +; CHECK-CVT-NEXT: csel w9, w10, w11, gt +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: csel w10, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s0 +; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov v1.h[5], w9 +; CHECK-CVT-NEXT: csel w9, w10, w11, gt +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: csel w8, w12, w8, lt +; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov v1.h[6], w9 +; CHECK-CVT-NEXT: csel w8, w8, w11, gt +; CHECK-CVT-NEXT: mov v1.h[7], w8 +; CHECK-CVT-NEXT: mov v0.16b, v1.16b ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i16: @@ -2921,69 +3103,119 @@ } define <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i19: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov w8, #-931135488 -; CHECK-NEXT: fcvt s18, h0 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: mov w8, #65472 -; CHECK-NEXT: fcvt s2, h1 -; CHECK-NEXT: mov h3, v1.h[1] -; CHECK-NEXT: movk w8, #18559, lsl #16 -; CHECK-NEXT: mov h5, v1.h[2] -; CHECK-NEXT: mov h1, v1.h[3] -; CHECK-NEXT: fmaxnm s6, s2, s4 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fmov s7, w8 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcmp s2, s2 -; CHECK-NEXT: fminnm s6, s6, s7 -; CHECK-NEXT: fmaxnm s16, s3, s4 -; CHECK-NEXT: fmaxnm s17, s5, s4 -; CHECK-NEXT: fcvtzs w8, s6 -; CHECK-NEXT: fminnm s2, s16, s7 -; CHECK-NEXT: mov h6, v0.h[1] -; CHECK-NEXT: fmaxnm s16, s1, s4 -; CHECK-NEXT: fminnm s17, s17, s7 -; CHECK-NEXT: csel w4, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: mov h3, v0.h[2] -; CHECK-NEXT: fcvtzs w8, s2 -; CHECK-NEXT: fcvt s2, h6 -; CHECK-NEXT: fminnm s6, s16, s7 -; CHECK-NEXT: fmaxnm s16, s18, s4 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fcvtzs w9, s17 -; CHECK-NEXT: csel w5, wzr, w8, vs -; CHECK-NEXT: fcmp s5, s5 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fmaxnm s5, s2, s4 -; CHECK-NEXT: fcvtzs w8, s6 -; CHECK-NEXT: fminnm s6, s16, s7 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: csel w6, wzr, w9, vs -; CHECK-NEXT: fcmp s1, s1 -; CHECK-NEXT: fmaxnm s1, s3, s4 -; CHECK-NEXT: fminnm s5, s5, s7 -; CHECK-NEXT: fcvtzs w9, s6 -; CHECK-NEXT: csel w7, wzr, w8, vs -; CHECK-NEXT: fcmp s18, s18 -; CHECK-NEXT: fmaxnm s4, s0, s4 -; CHECK-NEXT: fminnm s1, s1, s7 -; CHECK-NEXT: fcvtzs w8, s5 -; CHECK-NEXT: csel w0, wzr, w9, vs -; CHECK-NEXT: fcmp s2, s2 -; CHECK-NEXT: fminnm s2, s4, s7 -; CHECK-NEXT: fcvtzs w9, s1 -; CHECK-NEXT: csel w1, wzr, w8, vs -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs w8, s2 -; CHECK-NEXT: csel w2, wzr, w9, vs -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel w3, wzr, w8, vs -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v8f16_v8i19: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-CVT-NEXT: mov w8, #262143 +; CHECK-CVT-NEXT: mov w12, #-262144 +; CHECK-CVT-NEXT: fcvt s5, h0 +; CHECK-CVT-NEXT: mov h2, v1.h[1] +; CHECK-CVT-NEXT: fcvt s3, h1 +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: fcvtzs w10, s5 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvtzs w9, s3 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: fcvtzs w11, s2 +; CHECK-CVT-NEXT: csel w9, w9, w8, lt +; CHECK-CVT-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: fcvtzs w13, s3 +; CHECK-CVT-NEXT: csel w4, w9, w12, gt +; CHECK-CVT-NEXT: mov h2, v0.h[1] +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: fcvtzs w9, s1 +; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: cmn w11, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: csel w5, w11, w12, gt +; CHECK-CVT-NEXT: cmp w13, w8 +; CHECK-CVT-NEXT: csel w11, w13, w8, lt +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: cmn w11, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: csel w6, w11, w12, gt +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lt +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: fcvtzs w11, s2 +; CHECK-CVT-NEXT: csel w7, w9, w12, gt +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w9, w10, w8, lt +; CHECK-CVT-NEXT: fcvtzs w10, s1 +; CHECK-CVT-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: csel w0, w9, w12, gt +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w9, w11, w8, lt +; CHECK-CVT-NEXT: fcvtzs w11, s0 +; CHECK-CVT-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: csel w1, w9, w12, gt +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w9, w10, w8, lt +; CHECK-CVT-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: csel w2, w9, w12, gt +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w8, w11, w8, lt +; CHECK-CVT-NEXT: cmn w8, #64, lsl #12 // =262144 +; CHECK-CVT-NEXT: csel w3, w8, w12, gt +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v8f16_v8i19: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-FP16-NEXT: mov w8, #262143 +; CHECK-FP16-NEXT: mov w11, #-262144 +; CHECK-FP16-NEXT: mov h2, v1.h[1] +; CHECK-FP16-NEXT: fcvtzs w9, h1 +; CHECK-FP16-NEXT: mov h3, v1.h[2] +; CHECK-FP16-NEXT: mov h1, v1.h[3] +; CHECK-FP16-NEXT: cmp w9, w8 +; CHECK-FP16-NEXT: fcvtzs w10, h2 +; CHECK-FP16-NEXT: csel w9, w9, w8, lt +; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: fcvtzs w12, h3 +; CHECK-FP16-NEXT: csel w4, w9, w11, gt +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w9, w10, w8, lt +; CHECK-FP16-NEXT: fcvtzs w10, h1 +; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: csel w5, w9, w11, gt +; CHECK-FP16-NEXT: cmp w12, w8 +; CHECK-FP16-NEXT: csel w9, w12, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h0 +; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: csel w6, w9, w11, gt +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w9, w10, w8, lt +; CHECK-FP16-NEXT: fcvtzs w10, h1 +; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: csel w7, w9, w11, gt +; CHECK-FP16-NEXT: cmp w12, w8 +; CHECK-FP16-NEXT: csel w9, w12, w8, lt +; CHECK-FP16-NEXT: fcvtzs w12, h2 +; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: csel w0, w9, w11, gt +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w9, w10, w8, lt +; CHECK-FP16-NEXT: fcvtzs w10, h0 +; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: csel w1, w9, w11, gt +; CHECK-FP16-NEXT: cmp w12, w8 +; CHECK-FP16-NEXT: csel w9, w12, w8, lt +; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: csel w2, w9, w11, gt +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w8, w10, w8, lt +; CHECK-FP16-NEXT: cmn w8, #64, lsl #12 // =262144 +; CHECK-FP16-NEXT: csel w3, w8, w11, gt +; CHECK-FP16-NEXT: ret %x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x } @@ -3029,86 +3261,119 @@ } define <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i50: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-671088640 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov w9, #1476395007 -; CHECK-NEXT: mov h4, v0.h[1] -; CHECK-NEXT: mov x10, #562949953421311 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: fcvtzs x8, s3 -; CHECK-NEXT: mov x9, #-562949953421312 -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcmp s3, s2 -; CHECK-NEXT: csel x8, x9, x8, lt -; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: mov h3, v0.h[2] -; CHECK-NEXT: csel x0, xzr, x8, vs -; CHECK-NEXT: fcmp s4, s2 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: csel x8, x9, x11, lt -; CHECK-NEXT: fcmp s4, s1 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: mov h4, v0.h[3] -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fcmp s3, s2 -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: csel x8, x9, x11, lt -; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: csel x2, xzr, x8, vs -; CHECK-NEXT: fcmp s4, s2 -; CHECK-NEXT: csel x8, x9, x11, lt -; CHECK-NEXT: fcmp s4, s1 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: mov h4, v0.h[1] -; CHECK-NEXT: csel x3, xzr, x8, vs -; CHECK-NEXT: fcmp s3, s2 -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: csel x8, x9, x11, lt -; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: mov h3, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x4, xzr, x8, vs -; CHECK-NEXT: fcmp s4, s2 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: csel x8, x9, x11, lt -; CHECK-NEXT: fcmp s4, s1 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s4, s4 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: csel x5, xzr, x8, vs -; CHECK-NEXT: fcmp s3, s2 -; CHECK-NEXT: csel x8, x9, x11, lt -; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: fcvtzs x11, s0 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s3, s3 -; CHECK-NEXT: csel x6, xzr, x8, vs -; CHECK-NEXT: fcmp s0, s2 -; CHECK-NEXT: csel x8, x9, x11, lt -; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: csel x8, x10, x8, gt -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: csel x7, xzr, x8, vs -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v8f16_v8i50: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-CVT-NEXT: mov x8, #562949953421311 +; CHECK-CVT-NEXT: mov x12, #-562949953421312 +; CHECK-CVT-NEXT: fcvt s5, h0 +; CHECK-CVT-NEXT: mov h2, v1.h[1] +; CHECK-CVT-NEXT: fcvt s3, h1 +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: fcvtzs x10, s5 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvtzs x9, s3 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: cmp x9, x8 +; CHECK-CVT-NEXT: fcvtzs x11, s2 +; CHECK-CVT-NEXT: csel x9, x9, x8, lt +; CHECK-CVT-NEXT: cmp x9, x12 +; CHECK-CVT-NEXT: fcvtzs x13, s3 +; CHECK-CVT-NEXT: csel x4, x9, x12, gt +; CHECK-CVT-NEXT: mov h2, v0.h[1] +; CHECK-CVT-NEXT: cmp x11, x8 +; CHECK-CVT-NEXT: fcvtzs x9, s1 +; CHECK-CVT-NEXT: csel x11, x11, x8, lt +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: cmp x11, x12 +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: csel x5, x11, x12, gt +; CHECK-CVT-NEXT: cmp x13, x8 +; CHECK-CVT-NEXT: csel x11, x13, x8, lt +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: cmp x11, x12 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: csel x6, x11, x12, gt +; CHECK-CVT-NEXT: cmp x9, x8 +; CHECK-CVT-NEXT: csel x9, x9, x8, lt +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: cmp x9, x12 +; CHECK-CVT-NEXT: fcvtzs x11, s2 +; CHECK-CVT-NEXT: csel x7, x9, x12, gt +; CHECK-CVT-NEXT: cmp x10, x8 +; CHECK-CVT-NEXT: csel x9, x10, x8, lt +; CHECK-CVT-NEXT: fcvtzs x10, s1 +; CHECK-CVT-NEXT: cmp x9, x12 +; CHECK-CVT-NEXT: csel x0, x9, x12, gt +; CHECK-CVT-NEXT: cmp x11, x8 +; CHECK-CVT-NEXT: csel x9, x11, x8, lt +; CHECK-CVT-NEXT: fcvtzs x11, s0 +; CHECK-CVT-NEXT: cmp x9, x12 +; CHECK-CVT-NEXT: csel x1, x9, x12, gt +; CHECK-CVT-NEXT: cmp x10, x8 +; CHECK-CVT-NEXT: csel x9, x10, x8, lt +; CHECK-CVT-NEXT: cmp x9, x12 +; CHECK-CVT-NEXT: csel x2, x9, x12, gt +; CHECK-CVT-NEXT: cmp x11, x8 +; CHECK-CVT-NEXT: csel x8, x11, x8, lt +; CHECK-CVT-NEXT: cmp x8, x12 +; CHECK-CVT-NEXT: csel x3, x8, x12, gt +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_signed_v8f16_v8i50: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-FP16-NEXT: mov x8, #562949953421311 +; CHECK-FP16-NEXT: mov x11, #-562949953421312 +; CHECK-FP16-NEXT: mov h2, v1.h[1] +; CHECK-FP16-NEXT: fcvtzs x9, h1 +; CHECK-FP16-NEXT: mov h3, v1.h[2] +; CHECK-FP16-NEXT: mov h1, v1.h[3] +; CHECK-FP16-NEXT: cmp x9, x8 +; CHECK-FP16-NEXT: fcvtzs x10, h2 +; CHECK-FP16-NEXT: csel x9, x9, x8, lt +; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: fcvtzs x12, h3 +; CHECK-FP16-NEXT: csel x4, x9, x11, gt +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: cmp x10, x8 +; CHECK-FP16-NEXT: csel x9, x10, x8, lt +; CHECK-FP16-NEXT: fcvtzs x10, h1 +; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: csel x5, x9, x11, gt +; CHECK-FP16-NEXT: cmp x12, x8 +; CHECK-FP16-NEXT: csel x9, x12, x8, lt +; CHECK-FP16-NEXT: fcvtzs x12, h0 +; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: csel x6, x9, x11, gt +; CHECK-FP16-NEXT: cmp x10, x8 +; CHECK-FP16-NEXT: csel x9, x10, x8, lt +; CHECK-FP16-NEXT: fcvtzs x10, h1 +; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: csel x7, x9, x11, gt +; CHECK-FP16-NEXT: cmp x12, x8 +; CHECK-FP16-NEXT: csel x9, x12, x8, lt +; CHECK-FP16-NEXT: fcvtzs x12, h2 +; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: csel x0, x9, x11, gt +; CHECK-FP16-NEXT: cmp x10, x8 +; CHECK-FP16-NEXT: csel x9, x10, x8, lt +; CHECK-FP16-NEXT: fcvtzs x10, h0 +; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: csel x1, x9, x11, gt +; CHECK-FP16-NEXT: cmp x12, x8 +; CHECK-FP16-NEXT: csel x9, x12, x8, lt +; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: csel x2, x9, x11, gt +; CHECK-FP16-NEXT: cmp x10, x8 +; CHECK-FP16-NEXT: csel x8, x10, x8, lt +; CHECK-FP16-NEXT: cmp x8, x11 +; CHECK-FP16-NEXT: csel x3, x8, x11, gt +; CHECK-FP16-NEXT: ret %x = call <8 x i50> @llvm.fptosi.sat.v8f16.v8i50(<8 x half> %f) ret <8 x i50> %x } diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -20,11 +20,9 @@ define i1 @test_unsigned_i1_f32(float %f) nounwind { ; CHECK-LABEL: test_unsigned_i1_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s1, #1.00000000 -; CHECK-NEXT: fminnm s0, s0, s1 ; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: cmp w8, #1 +; CHECK-NEXT: csinc w8, w8, wzr, lo ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %x = call i1 @llvm.fptoui.sat.i1.f32(float %f) @@ -34,12 +32,10 @@ define i8 @test_unsigned_i8_f32(float %f) nounwind { ; CHECK-LABEL: test_unsigned_i8_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #1132396544 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fminnm s0, s0, s1 -; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: fcvtzu w9, s0 +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret %x = call i8 @llvm.fptoui.sat.i8.f32(float %f) ret i8 %x @@ -48,13 +44,10 @@ define i13 @test_unsigned_i13_f32(float %f) nounwind { ; CHECK-LABEL: test_unsigned_i13_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: movk w8, #17919, lsl #16 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fminnm s0, s0, s1 -; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: mov w9, #8191 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %x = call i13 @llvm.fptoui.sat.i13.f32(float %f) ret i13 %x @@ -63,13 +56,10 @@ define i16 @test_unsigned_i16_f32(float %f) nounwind { ; CHECK-LABEL: test_unsigned_i16_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #65280 -; CHECK-NEXT: movk w8, #18303, lsl #16 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fminnm s0, s0, s1 -; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: mov w9, #65535 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %x = call i16 @llvm.fptoui.sat.i16.f32(float %f) ret i16 %x @@ -78,13 +68,10 @@ define i19 @test_unsigned_i19_f32(float %f) nounwind { ; CHECK-LABEL: test_unsigned_i19_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #65504 -; CHECK-NEXT: movk w8, #18687, lsl #16 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fminnm s0, s0, s1 -; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: mov w9, #524287 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %x = call i19 @llvm.fptoui.sat.i19.f32(float %f) ret i19 %x @@ -102,14 +89,10 @@ define i50 @test_unsigned_i50_f32(float %f) nounwind { ; CHECK-LABEL: test_unsigned_i50_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1484783615 -; CHECK-NEXT: fcvtzu x9, s0 -; CHECK-NEXT: fcmp s0, #0.0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: csel x8, xzr, x9, lt +; CHECK-NEXT: fcvtzu x8, s0 ; CHECK-NEXT: mov x9, #1125899906842623 -; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: csel x0, x9, x8, gt +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: csel x0, x8, x9, lo ; CHECK-NEXT: ret %x = call i50 @llvm.fptoui.sat.i50.f32(float %f) ret i50 %x @@ -187,11 +170,9 @@ define i1 @test_unsigned_i1_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i1_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fmaxnm d0, d0, d1 -; CHECK-NEXT: fmov d1, #1.00000000 -; CHECK-NEXT: fminnm d0, d0, d1 ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: cmp w8, #1 +; CHECK-NEXT: csinc w8, w8, wzr, lo ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %x = call i1 @llvm.fptoui.sat.i1.f64(double %f) @@ -201,13 +182,10 @@ define i8 @test_unsigned_i8_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i8_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov x8, #246290604621824 -; CHECK-NEXT: movk x8, #16495, lsl #48 -; CHECK-NEXT: fmaxnm d0, d0, d1 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fminnm d0, d0, d1 -; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: fcvtzu w9, d0 +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret %x = call i8 @llvm.fptoui.sat.i8.f64(double %f) ret i8 %x @@ -216,13 +194,10 @@ define i13 @test_unsigned_i13_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i13_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov x8, #280375465082880 -; CHECK-NEXT: movk x8, #16575, lsl #48 -; CHECK-NEXT: fmaxnm d0, d0, d1 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fminnm d0, d0, d1 -; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: mov w9, #8191 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %x = call i13 @llvm.fptoui.sat.i13.f64(double %f) ret i13 %x @@ -231,13 +206,10 @@ define i16 @test_unsigned_i16_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i16_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov x8, #281337537757184 -; CHECK-NEXT: movk x8, #16623, lsl #48 -; CHECK-NEXT: fmaxnm d0, d0, d1 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fminnm d0, d0, d1 -; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: mov w9, #65535 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %x = call i16 @llvm.fptoui.sat.i16.f64(double %f) ret i16 %x @@ -246,13 +218,10 @@ define i19 @test_unsigned_i19_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i19_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov x8, #281457796841472 -; CHECK-NEXT: movk x8, #16671, lsl #48 -; CHECK-NEXT: fmaxnm d0, d0, d1 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fminnm d0, d0, d1 -; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: mov w9, #524287 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %x = call i19 @llvm.fptoui.sat.i19.f64(double %f) ret i19 %x @@ -270,13 +239,10 @@ define i50 @test_unsigned_i50_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i50_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov x8, #-8 -; CHECK-NEXT: movk x8, #17167, lsl #48 -; CHECK-NEXT: fmaxnm d0, d0, d1 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fminnm d0, d0, d1 -; CHECK-NEXT: fcvtzu x0, d0 +; CHECK-NEXT: fcvtzu x8, d0 +; CHECK-NEXT: mov x9, #1125899906842623 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: csel x0, x8, x9, lo ; CHECK-NEXT: ret %x = call i50 @llvm.fptoui.sat.i50.f64(double %f) ret i50 %x @@ -352,79 +318,106 @@ declare i128 @llvm.fptoui.sat.i128.f16(half) define i1 @test_unsigned_i1_f16(half %f) nounwind { -; CHECK-LABEL: test_unsigned_i1_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s1, #1.00000000 -; CHECK-NEXT: fminnm s0, s0, s1 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: and w0, w8, #0x1 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_i1_f16: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu w8, s0 +; CHECK-CVT-NEXT: cmp w8, #1 +; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo +; CHECK-CVT-NEXT: and w0, w8, #0x1 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_i1_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu w8, h0 +; CHECK-FP16-NEXT: cmp w8, #1 +; CHECK-FP16-NEXT: csinc w8, w8, wzr, lo +; CHECK-FP16-NEXT: and w0, w8, #0x1 +; CHECK-FP16-NEXT: ret %x = call i1 @llvm.fptoui.sat.i1.f16(half %f) ret i1 %x } define i8 @test_unsigned_i8_f16(half %f) nounwind { -; CHECK-LABEL: test_unsigned_i8_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov w8, #1132396544 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fminnm s0, s0, s1 -; CHECK-NEXT: fcvtzu w0, s0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_i8_f16: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov w8, #255 +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: csel w0, w9, w8, lo +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_i8_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu w9, h0 +; CHECK-FP16-NEXT: mov w8, #255 +; CHECK-FP16-NEXT: cmp w9, #255 +; CHECK-FP16-NEXT: csel w0, w9, w8, lo +; CHECK-FP16-NEXT: ret %x = call i8 @llvm.fptoui.sat.i8.f16(half %f) ret i8 %x } define i13 @test_unsigned_i13_f16(half %f) nounwind { -; CHECK-LABEL: test_unsigned_i13_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: movk w8, #17919, lsl #16 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fminnm s0, s0, s1 -; CHECK-NEXT: fcvtzu w0, s0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_i13_f16: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov w9, #8191 +; CHECK-CVT-NEXT: fcvtzu w8, s0 +; CHECK-CVT-NEXT: cmp w8, w9 +; CHECK-CVT-NEXT: csel w0, w8, w9, lo +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_i13_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu w8, h0 +; CHECK-FP16-NEXT: mov w9, #8191 +; CHECK-FP16-NEXT: cmp w8, w9 +; CHECK-FP16-NEXT: csel w0, w8, w9, lo +; CHECK-FP16-NEXT: ret %x = call i13 @llvm.fptoui.sat.i13.f16(half %f) ret i13 %x } define i16 @test_unsigned_i16_f16(half %f) nounwind { -; CHECK-LABEL: test_unsigned_i16_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov w8, #65280 -; CHECK-NEXT: movk w8, #18303, lsl #16 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fminnm s0, s0, s1 -; CHECK-NEXT: fcvtzu w0, s0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_i16_f16: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov w9, #65535 +; CHECK-CVT-NEXT: fcvtzu w8, s0 +; CHECK-CVT-NEXT: cmp w8, w9 +; CHECK-CVT-NEXT: csel w0, w8, w9, lo +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_i16_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu w8, h0 +; CHECK-FP16-NEXT: mov w9, #65535 +; CHECK-FP16-NEXT: cmp w8, w9 +; CHECK-FP16-NEXT: csel w0, w8, w9, lo +; CHECK-FP16-NEXT: ret %x = call i16 @llvm.fptoui.sat.i16.f16(half %f) ret i16 %x } define i19 @test_unsigned_i19_f16(half %f) nounwind { -; CHECK-LABEL: test_unsigned_i19_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov w8, #65504 -; CHECK-NEXT: movk w8, #18687, lsl #16 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fminnm s0, s0, s1 -; CHECK-NEXT: fcvtzu w0, s0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_i19_f16: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov w9, #524287 +; CHECK-CVT-NEXT: fcvtzu w8, s0 +; CHECK-CVT-NEXT: cmp w8, w9 +; CHECK-CVT-NEXT: csel w0, w8, w9, lo +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_i19_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu w8, h0 +; CHECK-FP16-NEXT: mov w9, #524287 +; CHECK-FP16-NEXT: cmp w8, w9 +; CHECK-FP16-NEXT: csel w0, w8, w9, lo +; CHECK-FP16-NEXT: ret %x = call i19 @llvm.fptoui.sat.i19.f16(half %f) ret i19 %x } @@ -445,18 +438,22 @@ } define i50 @test_unsigned_i50_f16(half %f) nounwind { -; CHECK-LABEL: test_unsigned_i50_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov w8, #1484783615 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fcvtzu x9, s0 -; CHECK-NEXT: fcmp s0, #0.0 -; CHECK-NEXT: csel x8, xzr, x9, lt -; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: mov x9, #1125899906842623 -; CHECK-NEXT: csel x0, x9, x8, gt -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_i50_f16: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov x9, #1125899906842623 +; CHECK-CVT-NEXT: fcvtzu x8, s0 +; CHECK-CVT-NEXT: cmp x8, x9 +; CHECK-CVT-NEXT: csel x0, x8, x9, lo +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_i50_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu x8, h0 +; CHECK-FP16-NEXT: mov x9, #1125899906842623 +; CHECK-FP16-NEXT: cmp x8, x9 +; CHECK-FP16-NEXT: csel x0, x8, x9, lo +; CHECK-FP16-NEXT: ret %x = call i50 @llvm.fptoui.sat.i50.f16(half %f) ret i50 %x } diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -750,17 +750,15 @@ define <2 x i1> @test_unsigned_v2f32_v2i1(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmov s3, #1.00000000 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmaxnm s1, s2, s1 -; CHECK-NEXT: fminnm s0, s0, s3 -; CHECK-NEXT: fminnm s1, s1, s3 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzu w9, s0 ; CHECK-NEXT: fcvtzu w8, s1 +; CHECK-NEXT: cmp w8, #1 +; CHECK-NEXT: csinc w8, w8, wzr, lo +; CHECK-NEXT: cmp w9, #1 +; CHECK-NEXT: csinc w9, w9, wzr, lo +; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -771,19 +769,17 @@ define <2 x i8> @test_unsigned_v2f32_v2i8(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #1132396544 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmaxnm s1, s2, s1 -; CHECK-NEXT: fminnm s0, s0, s3 -; CHECK-NEXT: fminnm s1, s1, s3 -; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzu w10, s0 +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: fcvtzu w9, s1 +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: csel w8, w10, w8, lo ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i8> @llvm.fptoui.sat.v2f32.v2i8(<2 x float> %f) @@ -793,19 +789,16 @@ define <2 x i13> @test_unsigned_v2f32_v2i13(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: movk w8, #17919, lsl #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: fmaxnm s1, s2, s1 -; CHECK-NEXT: fminnm s0, s0, s3 -; CHECK-NEXT: fminnm s1, s1, s3 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzu w9, s0 +; CHECK-NEXT: mov w10, #8191 ; CHECK-NEXT: fcvtzu w8, s1 +; CHECK-NEXT: cmp w8, w10 +; CHECK-NEXT: csel w8, w8, w10, lo +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: csel w9, w9, w10, lo +; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -816,19 +809,16 @@ define <2 x i16> @test_unsigned_v2f32_v2i16(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #65280 -; CHECK-NEXT: movk w8, #18303, lsl #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: fmaxnm s1, s2, s1 -; CHECK-NEXT: fminnm s0, s0, s3 -; CHECK-NEXT: fminnm s1, s1, s3 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzu w9, s0 +; CHECK-NEXT: mov w10, #65535 ; CHECK-NEXT: fcvtzu w8, s1 +; CHECK-NEXT: cmp w8, w10 +; CHECK-NEXT: csel w8, w8, w10, lo +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: csel w9, w9, w10, lo +; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -839,19 +829,16 @@ define <2 x i19> @test_unsigned_v2f32_v2i19(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #65504 -; CHECK-NEXT: movk w8, #18687, lsl #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: fmaxnm s1, s2, s1 -; CHECK-NEXT: fminnm s0, s0, s3 -; CHECK-NEXT: fminnm s1, s1, s3 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzu w9, s0 +; CHECK-NEXT: mov w10, #524287 ; CHECK-NEXT: fcvtzu w8, s1 +; CHECK-NEXT: cmp w8, w10 +; CHECK-NEXT: csel w8, w8, w10, lo +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: csel w9, w9, w10, lo +; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -873,19 +860,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: mov w8, #1484783615 -; CHECK-NEXT: mov x9, #1125899906842623 -; CHECK-NEXT: fcvtzu x10, s0 -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: fcvtzu x9, s0 +; CHECK-NEXT: mov x10, #1125899906842623 ; CHECK-NEXT: fcvtzu x8, s1 -; CHECK-NEXT: fcmp s1, #0.0 -; CHECK-NEXT: csel x8, xzr, x8, lt -; CHECK-NEXT: fcmp s1, s2 -; CHECK-NEXT: csel x8, x9, x8, gt -; CHECK-NEXT: fcmp s0, #0.0 -; CHECK-NEXT: csel x10, xzr, x10, lt -; CHECK-NEXT: fcmp s0, s2 -; CHECK-NEXT: csel x9, x9, x10, gt +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: csel x8, x8, x10, lo +; CHECK-NEXT: cmp x9, x10 +; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret @@ -1026,28 +1007,26 @@ define <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmov s4, #1.00000000 -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: fmaxnm s3, s0, s1 -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzu w9, s0 +; CHECK-NEXT: mov s2, v0.s[2] +; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: cmp w8, #1 +; CHECK-NEXT: csinc w8, w8, wzr, lo +; CHECK-NEXT: cmp w9, #1 +; CHECK-NEXT: csinc w9, w9, wzr, lo +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: fcvtzu w9, s2 +; CHECK-NEXT: mov v1.h[1], w8 +; CHECK-NEXT: cmp w9, #1 +; CHECK-NEXT: csinc w8, w9, wzr, lo +; CHECK-NEXT: fcvtzu w9, s0 +; CHECK-NEXT: mov v1.h[2], w8 +; CHECK-NEXT: cmp w9, #1 +; CHECK-NEXT: csinc w8, w9, wzr, lo +; CHECK-NEXT: mov v1.h[3], w8 +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %x = call <4 x i1> @llvm.fptoui.sat.v4f32.v4i1(<4 x float> %f) ret <4 x i1> %x @@ -1056,29 +1035,27 @@ define <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #1132396544 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s3, s0, s1 -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzu w10, s0 +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov s2, v0.s[2] +; CHECK-NEXT: mov s0, v0.s[3] +; CHECK-NEXT: fcvtzu w9, s1 +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: fcvtzu w10, s2 +; CHECK-NEXT: mov v1.h[1], w9 +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: csel w9, w10, w8, lo +; CHECK-NEXT: fcvtzu w10, s0 +; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: csel w8, w10, w8, lo +; CHECK-NEXT: mov v1.h[3], w8 +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %x = call <4 x i8> @llvm.fptoui.sat.v4f32.v4i8(<4 x float> %f) ret <4 x i8> %x @@ -1087,30 +1064,27 @@ define <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: movk w8, #17919, lsl #16 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: fmaxnm s3, s0, s1 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzu w10, s0 +; CHECK-NEXT: mov w8, #8191 +; CHECK-NEXT: mov s2, v0.s[2] +; CHECK-NEXT: mov s0, v0.s[3] +; CHECK-NEXT: fcvtzu w9, s1 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: fcvtzu w10, s2 +; CHECK-NEXT: mov v1.h[1], w9 +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w9, w10, w8, lo +; CHECK-NEXT: fcvtzu w10, s0 +; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w8, w10, w8, lo +; CHECK-NEXT: mov v1.h[3], w8 +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %x = call <4 x i13> @llvm.fptoui.sat.v4f32.v4i13(<4 x float> %f) ret <4 x i13> %x @@ -1119,30 +1093,27 @@ define <4 x i16> @test_unsigned_v4f32_v4i16(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #65280 -; CHECK-NEXT: movk w8, #18303, lsl #16 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: fmaxnm s3, s0, s1 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzu w10, s0 +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov s2, v0.s[2] +; CHECK-NEXT: mov s0, v0.s[3] +; CHECK-NEXT: fcvtzu w9, s1 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: fcvtzu w10, s2 +; CHECK-NEXT: mov v1.h[1], w9 +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w9, w10, w8, lo +; CHECK-NEXT: fcvtzu w10, s0 +; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w8, w10, w8, lo +; CHECK-NEXT: mov v1.h[3], w8 +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %x = call <4 x i16> @llvm.fptoui.sat.v4f32.v4i16(<4 x float> %f) ret <4 x i16> %x @@ -1151,29 +1122,27 @@ define <4 x i19> @test_unsigned_v4f32_v4i19(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w8, #65504 -; CHECK-NEXT: movk w8, #18687, lsl #16 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: fmaxnm s3, s0, s1 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fminnm s3, s3, s4 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov s3, v0.s[3] -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzu w10, s0 +; CHECK-NEXT: mov w8, #524287 +; CHECK-NEXT: mov s2, v0.s[2] +; CHECK-NEXT: mov s0, v0.s[3] +; CHECK-NEXT: fcvtzu w9, s1 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: fcvtzu w10, s2 +; CHECK-NEXT: mov v1.s[1], w9 +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w9, w10, w8, lo +; CHECK-NEXT: fcvtzu w10, s0 +; CHECK-NEXT: mov v1.s[2], w9 +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w8, w10, w8, lo +; CHECK-NEXT: mov v1.s[3], w8 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret %x = call <4 x i19> @llvm.fptoui.sat.v4f32.v4i19(<4 x float> %f) ret <4 x i19> %x @@ -1191,32 +1160,22 @@ define <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i50: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1484783615 -; CHECK-NEXT: fcvtzu x9, s0 -; CHECK-NEXT: fcmp s0, #0.0 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: csel x8, xzr, x9, lt -; CHECK-NEXT: mov x9, #1125899906842623 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov x8, #1125899906842623 +; CHECK-NEXT: mov s3, v0.s[1] +; CHECK-NEXT: fcvtzu x11, s0 +; CHECK-NEXT: mov s2, v1.s[1] +; CHECK-NEXT: fcvtzu x9, s1 +; CHECK-NEXT: fcvtzu x12, s3 +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: fcvtzu x10, s2 -; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: csel x0, x9, x8, gt -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: csel x8, xzr, x10, lt -; CHECK-NEXT: fcmp s2, s1 -; CHECK-NEXT: fcvtzu x10, s0 -; CHECK-NEXT: mov s2, v0.s[1] -; CHECK-NEXT: csel x1, x9, x8, gt -; CHECK-NEXT: fcmp s0, #0.0 -; CHECK-NEXT: csel x8, xzr, x10, lt -; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: csel x2, x9, x8, gt -; CHECK-NEXT: fcvtzu x8, s2 -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: csel x8, xzr, x8, lt -; CHECK-NEXT: fcmp s2, s1 -; CHECK-NEXT: csel x3, x9, x8, gt +; CHECK-NEXT: csel x2, x9, x8, lo +; CHECK-NEXT: cmp x10, x8 +; CHECK-NEXT: csel x3, x10, x8, lo +; CHECK-NEXT: cmp x11, x8 +; CHECK-NEXT: csel x0, x11, x8, lo +; CHECK-NEXT: cmp x12, x8 +; CHECK-NEXT: csel x1, x12, x8, lo ; CHECK-NEXT: ret %x = call <4 x i50> @llvm.fptoui.sat.v4f32.v4i50(<4 x float> %f) ret <4 x i50> %x @@ -1424,16 +1383,14 @@ define <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: fmov d3, #1.00000000 -; CHECK-NEXT: fmaxnm d0, d0, d1 -; CHECK-NEXT: fmaxnm d1, d2, d1 -; CHECK-NEXT: fminnm d0, d0, d3 -; CHECK-NEXT: fminnm d1, d1, d3 -; CHECK-NEXT: fcvtzu w8, d0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu w9, d0 ; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: cmp w8, #1 +; CHECK-NEXT: csinc w8, w8, wzr, lo +; CHECK-NEXT: cmp w9, #1 +; CHECK-NEXT: csinc w9, w9, wzr, lo +; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1444,19 +1401,16 @@ define <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov x8, #246290604621824 -; CHECK-NEXT: movk x8, #16495, lsl #48 -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: fmaxnm d0, d0, d1 -; CHECK-NEXT: fmov d3, x8 -; CHECK-NEXT: fmaxnm d1, d2, d1 -; CHECK-NEXT: fminnm d0, d0, d3 -; CHECK-NEXT: fminnm d1, d1, d3 -; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu w10, d0 +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: csel w8, w10, w8, lo ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i8> @llvm.fptoui.sat.v2f64.v2i8(<2 x double> %f) @@ -1466,18 +1420,15 @@ define <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov x8, #280375465082880 -; CHECK-NEXT: movk x8, #16575, lsl #48 -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: fmaxnm d0, d0, d1 -; CHECK-NEXT: fmov d3, x8 -; CHECK-NEXT: fmaxnm d1, d2, d1 -; CHECK-NEXT: fminnm d0, d0, d3 -; CHECK-NEXT: fminnm d1, d1, d3 -; CHECK-NEXT: fcvtzu w8, d0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu w9, d0 +; CHECK-NEXT: mov w10, #8191 ; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: cmp w8, w10 +; CHECK-NEXT: csel w8, w8, w10, lo +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: csel w9, w9, w10, lo +; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1488,18 +1439,15 @@ define <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov x8, #281337537757184 -; CHECK-NEXT: movk x8, #16623, lsl #48 -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: fmaxnm d0, d0, d1 -; CHECK-NEXT: fmov d3, x8 -; CHECK-NEXT: fmaxnm d1, d2, d1 -; CHECK-NEXT: fminnm d0, d0, d3 -; CHECK-NEXT: fminnm d1, d1, d3 -; CHECK-NEXT: fcvtzu w8, d0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu w9, d0 +; CHECK-NEXT: mov w10, #65535 ; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: cmp w8, w10 +; CHECK-NEXT: csel w8, w8, w10, lo +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: csel w9, w9, w10, lo +; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1510,18 +1458,15 @@ define <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov x8, #281457796841472 -; CHECK-NEXT: movk x8, #16671, lsl #48 -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: fmaxnm d0, d0, d1 -; CHECK-NEXT: fmov d3, x8 -; CHECK-NEXT: fmaxnm d1, d2, d1 -; CHECK-NEXT: fminnm d0, d0, d3 -; CHECK-NEXT: fminnm d1, d1, d3 -; CHECK-NEXT: fcvtzu w8, d0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu w9, d0 +; CHECK-NEXT: mov w10, #524287 ; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: cmp w8, w10 +; CHECK-NEXT: csel w8, w8, w10, lo +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: csel w9, w9, w10, lo +; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1546,18 +1491,15 @@ define <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i50: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov x8, #-8 -; CHECK-NEXT: movk x8, #17167, lsl #48 -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: fmaxnm d0, d0, d1 -; CHECK-NEXT: fmov d3, x8 -; CHECK-NEXT: fmaxnm d1, d2, d1 -; CHECK-NEXT: fminnm d0, d0, d3 -; CHECK-NEXT: fminnm d1, d1, d3 -; CHECK-NEXT: fcvtzu x8, d0 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu x9, d0 +; CHECK-NEXT: mov x10, #1125899906842623 ; CHECK-NEXT: fcvtzu x8, d1 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: csel x8, x8, x10, lo +; CHECK-NEXT: cmp x9, x10 +; CHECK-NEXT: csel x9, x9, x10, lo +; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %x = call <2 x i50> @llvm.fptoui.sat.v2f64.v2i50(<2 x double> %f) @@ -1688,109 +1630,179 @@ declare <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half>) define <4 x i1> @test_unsigned_v4f16_v4i1(<4 x half> %f) { -; CHECK-LABEL: test_unsigned_v4f16_v4i1: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s2, h0 -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: mov h5, v0.h[2] -; CHECK-NEXT: fmov s4, #1.00000000 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fmaxnm s3, s3, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: fminnm s2, s3, s4 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i1: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov h2, v0.h[3] +; CHECK-CVT-NEXT: fcvtzu w8, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: cmp w8, #1 +; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo +; CHECK-CVT-NEXT: cmp w9, #1 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo +; CHECK-CVT-NEXT: fmov s0, w9 +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: fcvt s1, h2 +; CHECK-CVT-NEXT: mov v0.h[1], w8 +; CHECK-CVT-NEXT: cmp w9, #1 +; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: mov v0.h[2], w8 +; CHECK-CVT-NEXT: cmp w9, #1 +; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo +; CHECK-CVT-NEXT: mov v0.h[3], w8 +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i1: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzu w9, h0 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu w8, h1 +; CHECK-FP16-NEXT: cmp w8, #1 +; CHECK-FP16-NEXT: csinc w8, w8, wzr, lo +; CHECK-FP16-NEXT: cmp w9, #1 +; CHECK-FP16-NEXT: csinc w9, w9, wzr, lo +; CHECK-FP16-NEXT: fmov s1, w9 +; CHECK-FP16-NEXT: fcvtzu w9, h2 +; CHECK-FP16-NEXT: mov v1.h[1], w8 +; CHECK-FP16-NEXT: cmp w9, #1 +; CHECK-FP16-NEXT: csinc w8, w9, wzr, lo +; CHECK-FP16-NEXT: fcvtzu w9, h0 +; CHECK-FP16-NEXT: mov v1.h[2], w8 +; CHECK-FP16-NEXT: cmp w9, #1 +; CHECK-FP16-NEXT: csinc w8, w9, wzr, lo +; CHECK-FP16-NEXT: mov v1.h[3], w8 +; CHECK-FP16-NEXT: fmov d0, d1 +; CHECK-FP16-NEXT: ret %x = call <4 x i1> @llvm.fptoui.sat.v4f16.v4i1(<4 x half> %f) ret <4 x i1> %x } define <4 x i8> @test_unsigned_v4f16_v4i8(<4 x half> %f) { -; CHECK-LABEL: test_unsigned_v4f16_v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s2, h0 -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: mov w8, #1132396544 -; CHECK-NEXT: mov h5, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fmaxnm s3, s3, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: fminnm s2, s3, s4 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i8: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov w8, #255 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov h2, v0.h[3] +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: fmov s0, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvt s1, h2 +; CHECK-CVT-NEXT: mov v0.h[1], w9 +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: mov v0.h[2], w9 +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo +; CHECK-CVT-NEXT: mov v0.h[3], w8 +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i8: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzu w10, h0 +; CHECK-FP16-NEXT: mov w8, #255 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu w9, h1 +; CHECK-FP16-NEXT: cmp w9, #255 +; CHECK-FP16-NEXT: csel w9, w9, w8, lo +; CHECK-FP16-NEXT: cmp w10, #255 +; CHECK-FP16-NEXT: csel w10, w10, w8, lo +; CHECK-FP16-NEXT: fmov s1, w10 +; CHECK-FP16-NEXT: fcvtzu w10, h2 +; CHECK-FP16-NEXT: mov v1.h[1], w9 +; CHECK-FP16-NEXT: cmp w10, #255 +; CHECK-FP16-NEXT: csel w9, w10, w8, lo +; CHECK-FP16-NEXT: fcvtzu w10, h0 +; CHECK-FP16-NEXT: mov v1.h[2], w9 +; CHECK-FP16-NEXT: cmp w10, #255 +; CHECK-FP16-NEXT: csel w8, w10, w8, lo +; CHECK-FP16-NEXT: mov v1.h[3], w8 +; CHECK-FP16-NEXT: fmov d0, d1 +; CHECK-FP16-NEXT: ret %x = call <4 x i8> @llvm.fptoui.sat.v4f16.v4i8(<4 x half> %f) ret <4 x i8> %x } define <4 x i13> @test_unsigned_v4f16_v4i13(<4 x half> %f) { -; CHECK-LABEL: test_unsigned_v4f16_v4i13: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s2, h0 -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: movk w8, #17919, lsl #16 -; CHECK-NEXT: mov h5, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fmaxnm s3, s3, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: fminnm s2, s3, s4 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i13: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov w8, #8191 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov h2, v0.h[3] +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: fmov s0, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvt s1, h2 +; CHECK-CVT-NEXT: mov v0.h[1], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: mov v0.h[2], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo +; CHECK-CVT-NEXT: mov v0.h[3], w8 +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i13: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzu w10, h0 +; CHECK-FP16-NEXT: mov w8, #8191 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu w9, h1 +; CHECK-FP16-NEXT: cmp w9, w8 +; CHECK-FP16-NEXT: csel w9, w9, w8, lo +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w10, w10, w8, lo +; CHECK-FP16-NEXT: fmov s1, w10 +; CHECK-FP16-NEXT: fcvtzu w10, h2 +; CHECK-FP16-NEXT: mov v1.h[1], w9 +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w9, w10, w8, lo +; CHECK-FP16-NEXT: fcvtzu w10, h0 +; CHECK-FP16-NEXT: mov v1.h[2], w9 +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w8, w10, w8, lo +; CHECK-FP16-NEXT: mov v1.h[3], w8 +; CHECK-FP16-NEXT: fmov d0, d1 +; CHECK-FP16-NEXT: ret %x = call <4 x i13> @llvm.fptoui.sat.v4f16.v4i13(<4 x half> %f) ret <4 x i13> %x } @@ -1798,29 +1810,26 @@ define <4 x i16> @test_unsigned_v4f16_v4i16(<4 x half> %f) { ; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i16: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: movi d1, #0000000000000000 -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: mov w8, #65280 -; CHECK-CVT-NEXT: movk w8, #18303, lsl #16 -; CHECK-CVT-NEXT: fmaxnm s2, s0, s1 -; CHECK-CVT-NEXT: mov s3, v0.s[1] -; CHECK-CVT-NEXT: fmov s4, w8 -; CHECK-CVT-NEXT: mov s5, v0.s[2] -; CHECK-CVT-NEXT: fminnm s2, s2, s4 -; CHECK-CVT-NEXT: fmaxnm s3, s3, s1 -; CHECK-CVT-NEXT: fmaxnm s5, s5, s1 -; CHECK-CVT-NEXT: fcvtzu w8, s2 -; CHECK-CVT-NEXT: fminnm s2, s3, s4 -; CHECK-CVT-NEXT: mov s3, v0.s[3] -; CHECK-CVT-NEXT: fmov s0, w8 -; CHECK-CVT-NEXT: fcvtzu w9, s2 -; CHECK-CVT-NEXT: fminnm s2, s5, s4 -; CHECK-CVT-NEXT: fmaxnm s1, s3, s1 +; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-NEXT: mov w8, #65535 +; CHECK-CVT-NEXT: mov s0, v1.s[1] +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: mov s2, v1.s[2] +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: fmov s0, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s2 ; CHECK-CVT-NEXT: mov v0.h[1], w9 -; CHECK-CVT-NEXT: fcvtzu w8, s2 -; CHECK-CVT-NEXT: fminnm s1, s1, s4 -; CHECK-CVT-NEXT: mov v0.h[2], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s1 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: mov v0.h[2], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo ; CHECK-CVT-NEXT: mov v0.h[3], w8 ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-CVT-NEXT: ret @@ -1834,37 +1843,60 @@ } define <4 x i19> @test_unsigned_v4f16_v4i19(<4 x half> %f) { -; CHECK-LABEL: test_unsigned_v4f16_v4i19: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcvt s2, h0 -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: mov w8, #65504 -; CHECK-NEXT: movk w8, #18687, lsl #16 -; CHECK-NEXT: mov h5, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmaxnm s2, s2, s1 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fminnm s2, s2, s4 -; CHECK-NEXT: fmaxnm s3, s3, s1 -; CHECK-NEXT: fmaxnm s5, s5, s1 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: fminnm s2, s3, s4 -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w9, s2 -; CHECK-NEXT: fmaxnm s1, s3, s1 -; CHECK-NEXT: fminnm s2, s5, s4 -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: fminnm s1, s1, s4 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: fcvtzu w8, s1 -; CHECK-NEXT: mov v0.s[3], w8 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i19: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov w8, #524287 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov h2, v0.h[3] +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: fmov s0, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvt s1, h2 +; CHECK-CVT-NEXT: mov v0.s[1], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: mov v0.s[2], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo +; CHECK-CVT-NEXT: mov v0.s[3], w8 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i19: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzu w10, h0 +; CHECK-FP16-NEXT: mov w8, #524287 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu w9, h1 +; CHECK-FP16-NEXT: cmp w9, w8 +; CHECK-FP16-NEXT: csel w9, w9, w8, lo +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w10, w10, w8, lo +; CHECK-FP16-NEXT: fmov s1, w10 +; CHECK-FP16-NEXT: fcvtzu w10, h2 +; CHECK-FP16-NEXT: mov v1.s[1], w9 +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w9, w10, w8, lo +; CHECK-FP16-NEXT: fcvtzu w10, h0 +; CHECK-FP16-NEXT: mov v1.s[2], w9 +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w8, w10, w8, lo +; CHECK-FP16-NEXT: mov v1.s[3], w8 +; CHECK-FP16-NEXT: mov v0.16b, v1.16b +; CHECK-FP16-NEXT: ret %x = call <4 x i19> @llvm.fptoui.sat.v4f16.v4i19(<4 x half> %f) ret <4 x i19> %x } @@ -1897,40 +1929,51 @@ } define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) { -; CHECK-LABEL: test_unsigned_v4f16_v4i50: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fcvt s1, h0 -; CHECK-NEXT: mov w8, #1484783615 -; CHECK-NEXT: mov h2, v0.h[1] -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: fcvtzu x9, s1 -; CHECK-NEXT: fcmp s1, #0.0 -; CHECK-NEXT: fcvt s2, h2 -; CHECK-NEXT: csel x8, xzr, x9, lt -; CHECK-NEXT: fcmp s1, s3 -; CHECK-NEXT: mov h1, v0.h[2] -; CHECK-NEXT: mov x9, #1125899906842623 -; CHECK-NEXT: fcvtzu x10, s2 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x0, x9, x8, gt -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: csel x8, xzr, x10, lt -; CHECK-NEXT: fcmp s2, s3 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvtzu x10, s1 -; CHECK-NEXT: csel x1, x9, x8, gt -; CHECK-NEXT: fcmp s1, #0.0 -; CHECK-NEXT: csel x8, xzr, x10, lt -; CHECK-NEXT: fcmp s1, s3 -; CHECK-NEXT: csel x2, x9, x8, gt -; CHECK-NEXT: fcvtzu x8, s0 -; CHECK-NEXT: fcmp s0, #0.0 -; CHECK-NEXT: csel x8, xzr, x8, lt -; CHECK-NEXT: fcmp s0, s3 -; CHECK-NEXT: csel x3, x9, x8, gt -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i50: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v0.h[3] +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov x8, #1125899906842623 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvtzu x9, s0 +; CHECK-CVT-NEXT: fcvtzu x10, s1 +; CHECK-CVT-NEXT: fcvtzu x11, s2 +; CHECK-CVT-NEXT: cmp x9, x8 +; CHECK-CVT-NEXT: fcvtzu x12, s3 +; CHECK-CVT-NEXT: csel x0, x9, x8, lo +; CHECK-CVT-NEXT: cmp x10, x8 +; CHECK-CVT-NEXT: csel x1, x10, x8, lo +; CHECK-CVT-NEXT: cmp x11, x8 +; CHECK-CVT-NEXT: csel x2, x11, x8, lo +; CHECK-CVT-NEXT: cmp x12, x8 +; CHECK-CVT-NEXT: csel x3, x12, x8, lo +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i50: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h3, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu x9, h0 +; CHECK-FP16-NEXT: mov x8, #1125899906842623 +; CHECK-FP16-NEXT: fcvtzu x10, h1 +; CHECK-FP16-NEXT: fcvtzu x11, h2 +; CHECK-FP16-NEXT: cmp x9, x8 +; CHECK-FP16-NEXT: fcvtzu x12, h3 +; CHECK-FP16-NEXT: csel x0, x9, x8, lo +; CHECK-FP16-NEXT: cmp x10, x8 +; CHECK-FP16-NEXT: csel x1, x10, x8, lo +; CHECK-FP16-NEXT: cmp x11, x8 +; CHECK-FP16-NEXT: csel x2, x11, x8, lo +; CHECK-FP16-NEXT: cmp x12, x8 +; CHECK-FP16-NEXT: csel x3, x12, x8, lo +; CHECK-FP16-NEXT: ret %x = call <4 x i50> @llvm.fptoui.sat.v4f16.v4i50(<4 x half> %f) ret <4 x i50> %x } @@ -2162,59 +2205,101 @@ declare <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half>) define <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) { -; CHECK-LABEL: test_unsigned_v8f16_v8i1: -; CHECK: // %bb.0: -; CHECK-NEXT: movi d2, #0000000000000000 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov h4, v0.h[2] -; CHECK-NEXT: fmov s5, #1.00000000 -; CHECK-NEXT: mov h6, v0.h[3] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fmaxnm s3, s3, s2 -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fmaxnm s1, s1, s2 -; CHECK-NEXT: fminnm s3, s3, s5 -; CHECK-NEXT: fmaxnm s4, s4, s2 -; CHECK-NEXT: fmaxnm s6, s6, s2 -; CHECK-NEXT: fminnm s7, s1, s5 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov h3, v0.h[4] -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fminnm s6, s6, s5 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fcvtzu w8, s7 -; CHECK-NEXT: mov h7, v0.h[5] -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: mov v1.b[1], w8 -; CHECK-NEXT: fcvtzu w8, s4 -; CHECK-NEXT: fcvt s4, h7 -; CHECK-NEXT: fmaxnm s3, s3, s2 -; CHECK-NEXT: mov h7, v0.h[6] -; CHECK-NEXT: mov h0, v0.h[7] -; CHECK-NEXT: mov v1.b[2], w8 -; CHECK-NEXT: fcvtzu w8, s6 -; CHECK-NEXT: fmaxnm s4, s4, s2 -; CHECK-NEXT: fminnm s3, s3, s5 -; CHECK-NEXT: fcvt s6, h7 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov v1.b[3], w8 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: fminnm s3, s4, s5 -; CHECK-NEXT: fmaxnm s4, s6, s2 -; CHECK-NEXT: fmaxnm s0, s0, s2 -; CHECK-NEXT: mov v1.b[4], w8 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: fminnm s2, s4, s5 -; CHECK-NEXT: fminnm s0, s0, s5 -; CHECK-NEXT: mov v1.b[5], w8 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v1.b[6], w8 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: mov v1.b[7], w8 -; CHECK-NEXT: fmov d0, d1 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i1: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: fcvtzu w8, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[3] +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: cmp w8, #1 +; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo +; CHECK-CVT-NEXT: cmp w9, #1 +; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo +; CHECK-CVT-NEXT: fcvt s3, h1 +; CHECK-CVT-NEXT: cmp w10, #1 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fmov s1, w9 +; CHECK-CVT-NEXT: fcvtzu w9, s3 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: mov v1.b[1], w8 +; CHECK-CVT-NEXT: csinc w8, w10, wzr, lo +; CHECK-CVT-NEXT: cmp w9, #1 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo +; CHECK-CVT-NEXT: mov v1.b[2], w8 +; CHECK-CVT-NEXT: fcvtzu w8, s2 +; CHECK-CVT-NEXT: mov h2, v0.h[6] +; CHECK-CVT-NEXT: fcvtzu w10, s3 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: cmp w8, #1 +; CHECK-CVT-NEXT: mov v1.b[3], w9 +; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: cmp w10, #1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov v1.b[4], w8 +; CHECK-CVT-NEXT: csinc w8, w10, wzr, lo +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov v1.b[5], w8 +; CHECK-CVT-NEXT: cmp w9, #1 +; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: mov v1.b[6], w8 +; CHECK-CVT-NEXT: cmp w9, #1 +; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo +; CHECK-CVT-NEXT: mov v1.b[7], w8 +; CHECK-CVT-NEXT: fmov d0, d1 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i1: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzu w9, h0 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h3, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu w8, h1 +; CHECK-FP16-NEXT: cmp w8, #1 +; CHECK-FP16-NEXT: csinc w8, w8, wzr, lo +; CHECK-FP16-NEXT: cmp w9, #1 +; CHECK-FP16-NEXT: csinc w9, w9, wzr, lo +; CHECK-FP16-NEXT: fmov s1, w9 +; CHECK-FP16-NEXT: fcvtzu w9, h2 +; CHECK-FP16-NEXT: mov h2, v0.h[4] +; CHECK-FP16-NEXT: mov v1.b[1], w8 +; CHECK-FP16-NEXT: cmp w9, #1 +; CHECK-FP16-NEXT: fcvtzu w8, h3 +; CHECK-FP16-NEXT: csinc w9, w9, wzr, lo +; CHECK-FP16-NEXT: mov h3, v0.h[5] +; CHECK-FP16-NEXT: cmp w8, #1 +; CHECK-FP16-NEXT: mov v1.b[2], w9 +; CHECK-FP16-NEXT: csinc w8, w8, wzr, lo +; CHECK-FP16-NEXT: fcvtzu w9, h2 +; CHECK-FP16-NEXT: mov h2, v0.h[6] +; CHECK-FP16-NEXT: mov h0, v0.h[7] +; CHECK-FP16-NEXT: mov v1.b[3], w8 +; CHECK-FP16-NEXT: cmp w9, #1 +; CHECK-FP16-NEXT: fcvtzu w8, h3 +; CHECK-FP16-NEXT: csinc w9, w9, wzr, lo +; CHECK-FP16-NEXT: cmp w8, #1 +; CHECK-FP16-NEXT: mov v1.b[4], w9 +; CHECK-FP16-NEXT: csinc w8, w8, wzr, lo +; CHECK-FP16-NEXT: fcvtzu w9, h2 +; CHECK-FP16-NEXT: mov v1.b[5], w8 +; CHECK-FP16-NEXT: cmp w9, #1 +; CHECK-FP16-NEXT: csinc w8, w9, wzr, lo +; CHECK-FP16-NEXT: fcvtzu w9, h0 +; CHECK-FP16-NEXT: mov v1.b[6], w8 +; CHECK-FP16-NEXT: cmp w9, #1 +; CHECK-FP16-NEXT: csinc w8, w9, wzr, lo +; CHECK-FP16-NEXT: mov v1.b[7], w8 +; CHECK-FP16-NEXT: fmov d0, d1 +; CHECK-FP16-NEXT: ret %x = call <8 x i1> @llvm.fptoui.sat.v8f16.v8i1(<8 x half> %f) ret <8 x i1> %x } @@ -2223,102 +2308,90 @@ ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i8: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: movi d2, #0000000000000000 -; CHECK-CVT-NEXT: mov w8, #1132396544 +; CHECK-CVT-NEXT: mov w8, #255 ; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: mov s3, v1.s[1] -; CHECK-CVT-NEXT: fmaxnm s4, s1, s2 -; CHECK-CVT-NEXT: fmov s5, w8 -; CHECK-CVT-NEXT: mov s6, v1.s[2] -; CHECK-CVT-NEXT: mov s7, v1.s[3] -; CHECK-CVT-NEXT: fmaxnm s3, s3, s2 -; CHECK-CVT-NEXT: fminnm s4, s4, s5 -; CHECK-CVT-NEXT: fmaxnm s6, s6, s2 -; CHECK-CVT-NEXT: fminnm s3, s3, s5 -; CHECK-CVT-NEXT: fcvtzu w8, s4 -; CHECK-CVT-NEXT: fminnm s4, s6, s5 -; CHECK-CVT-NEXT: mov s6, v0.s[1] -; CHECK-CVT-NEXT: fmov s1, w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fmaxnm s3, s7, s2 -; CHECK-CVT-NEXT: mov v1.b[1], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s4 -; CHECK-CVT-NEXT: fminnm s3, s3, s5 -; CHECK-CVT-NEXT: fmaxnm s4, s0, s2 -; CHECK-CVT-NEXT: mov v1.b[2], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fminnm s3, s4, s5 -; CHECK-CVT-NEXT: fmaxnm s4, s6, s2 -; CHECK-CVT-NEXT: mov s6, v0.s[2] +; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: mov s3, v1.s[3] +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov s2, v1.s[2] +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: fmov s1, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: mov v1.b[1], w9 +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: fcvtzu w9, s3 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: mov v1.b[2], w10 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: mov v1.b[3], w9 +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: mov s2, v0.s[2] ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: mov v1.b[3], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fminnm s3, s4, s5 -; CHECK-CVT-NEXT: fmaxnm s4, s6, s2 -; CHECK-CVT-NEXT: fmaxnm s0, s0, s2 -; CHECK-CVT-NEXT: mov v1.b[4], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fminnm s2, s4, s5 -; CHECK-CVT-NEXT: fminnm s0, s0, s5 -; CHECK-CVT-NEXT: mov v1.b[5], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s2 -; CHECK-CVT-NEXT: mov v1.b[6], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s0 +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: mov v1.b[4], w10 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov v1.b[5], w9 +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: mov v1.b[6], w9 +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo ; CHECK-CVT-NEXT: mov v1.b[7], w8 ; CHECK-CVT-NEXT: fmov d0, d1 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i8: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi d2, #0000000000000000 ; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: fcvt s3, h0 -; CHECK-FP16-NEXT: mov w8, #1132396544 -; CHECK-FP16-NEXT: mov h4, v0.h[2] -; CHECK-FP16-NEXT: mov h6, v0.h[3] -; CHECK-FP16-NEXT: fcvt s1, h1 -; CHECK-FP16-NEXT: fmov s5, w8 -; CHECK-FP16-NEXT: fmaxnm s3, s3, s2 -; CHECK-FP16-NEXT: fcvt s4, h4 -; CHECK-FP16-NEXT: fcvt s6, h6 -; CHECK-FP16-NEXT: fmaxnm s1, s1, s2 -; CHECK-FP16-NEXT: fminnm s3, s3, s5 -; CHECK-FP16-NEXT: fmaxnm s4, s4, s2 -; CHECK-FP16-NEXT: fmaxnm s6, s6, s2 -; CHECK-FP16-NEXT: fminnm s7, s1, s5 -; CHECK-FP16-NEXT: fcvtzu w8, s3 -; CHECK-FP16-NEXT: mov h3, v0.h[4] -; CHECK-FP16-NEXT: fminnm s4, s4, s5 -; CHECK-FP16-NEXT: fminnm s6, s6, s5 -; CHECK-FP16-NEXT: fmov s1, w8 -; CHECK-FP16-NEXT: fcvtzu w8, s7 -; CHECK-FP16-NEXT: mov h7, v0.h[5] -; CHECK-FP16-NEXT: fcvt s3, h3 -; CHECK-FP16-NEXT: mov v1.b[1], w8 -; CHECK-FP16-NEXT: fcvtzu w8, s4 -; CHECK-FP16-NEXT: fcvt s4, h7 -; CHECK-FP16-NEXT: fmaxnm s3, s3, s2 -; CHECK-FP16-NEXT: mov h7, v0.h[6] +; CHECK-FP16-NEXT: fcvtzu w10, h0 +; CHECK-FP16-NEXT: mov w8, #255 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h3, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu w9, h1 +; CHECK-FP16-NEXT: cmp w9, #255 +; CHECK-FP16-NEXT: csel w9, w9, w8, lo +; CHECK-FP16-NEXT: cmp w10, #255 +; CHECK-FP16-NEXT: csel w10, w10, w8, lo +; CHECK-FP16-NEXT: fmov s1, w10 +; CHECK-FP16-NEXT: fcvtzu w10, h2 +; CHECK-FP16-NEXT: mov h2, v0.h[4] +; CHECK-FP16-NEXT: mov v1.b[1], w9 +; CHECK-FP16-NEXT: cmp w10, #255 +; CHECK-FP16-NEXT: fcvtzu w9, h3 +; CHECK-FP16-NEXT: csel w10, w10, w8, lo +; CHECK-FP16-NEXT: mov h3, v0.h[5] +; CHECK-FP16-NEXT: cmp w9, #255 +; CHECK-FP16-NEXT: mov v1.b[2], w10 +; CHECK-FP16-NEXT: csel w9, w9, w8, lo +; CHECK-FP16-NEXT: fcvtzu w10, h2 +; CHECK-FP16-NEXT: mov h2, v0.h[6] ; CHECK-FP16-NEXT: mov h0, v0.h[7] -; CHECK-FP16-NEXT: mov v1.b[2], w8 -; CHECK-FP16-NEXT: fcvtzu w8, s6 -; CHECK-FP16-NEXT: fmaxnm s4, s4, s2 -; CHECK-FP16-NEXT: fminnm s3, s3, s5 -; CHECK-FP16-NEXT: fcvt s6, h7 -; CHECK-FP16-NEXT: fcvt s0, h0 -; CHECK-FP16-NEXT: mov v1.b[3], w8 -; CHECK-FP16-NEXT: fcvtzu w8, s3 -; CHECK-FP16-NEXT: fminnm s3, s4, s5 -; CHECK-FP16-NEXT: fmaxnm s4, s6, s2 -; CHECK-FP16-NEXT: fmaxnm s0, s0, s2 -; CHECK-FP16-NEXT: mov v1.b[4], w8 -; CHECK-FP16-NEXT: fcvtzu w8, s3 -; CHECK-FP16-NEXT: fminnm s2, s4, s5 -; CHECK-FP16-NEXT: fminnm s0, s0, s5 -; CHECK-FP16-NEXT: mov v1.b[5], w8 -; CHECK-FP16-NEXT: fcvtzu w8, s2 -; CHECK-FP16-NEXT: mov v1.b[6], w8 -; CHECK-FP16-NEXT: fcvtzu w8, s0 +; CHECK-FP16-NEXT: mov v1.b[3], w9 +; CHECK-FP16-NEXT: cmp w10, #255 +; CHECK-FP16-NEXT: fcvtzu w9, h3 +; CHECK-FP16-NEXT: csel w10, w10, w8, lo +; CHECK-FP16-NEXT: cmp w9, #255 +; CHECK-FP16-NEXT: mov v1.b[4], w10 +; CHECK-FP16-NEXT: csel w9, w9, w8, lo +; CHECK-FP16-NEXT: fcvtzu w10, h2 +; CHECK-FP16-NEXT: mov v1.b[5], w9 +; CHECK-FP16-NEXT: cmp w10, #255 +; CHECK-FP16-NEXT: csel w9, w10, w8, lo +; CHECK-FP16-NEXT: fcvtzu w10, h0 +; CHECK-FP16-NEXT: mov v1.b[6], w9 +; CHECK-FP16-NEXT: cmp w10, #255 +; CHECK-FP16-NEXT: csel w8, w10, w8, lo ; CHECK-FP16-NEXT: mov v1.b[7], w8 ; CHECK-FP16-NEXT: fmov d0, d1 ; CHECK-FP16-NEXT: ret @@ -2327,61 +2400,103 @@ } define <8 x i13> @test_unsigned_v8f16_v8i13(<8 x half> %f) { -; CHECK-LABEL: test_unsigned_v8f16_v8i13: -; CHECK: // %bb.0: -; CHECK-NEXT: movi d2, #0000000000000000 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: fcvt s3, h0 -; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: movk w8, #17919, lsl #16 -; CHECK-NEXT: mov h4, v0.h[2] -; CHECK-NEXT: mov h6, v0.h[3] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fmaxnm s3, s3, s2 -; CHECK-NEXT: fmov s5, w8 -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fmaxnm s1, s1, s2 -; CHECK-NEXT: fminnm s3, s3, s5 -; CHECK-NEXT: fmaxnm s4, s4, s2 -; CHECK-NEXT: fmaxnm s6, s6, s2 -; CHECK-NEXT: fminnm s7, s1, s5 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: mov h3, v0.h[4] -; CHECK-NEXT: fminnm s4, s4, s5 -; CHECK-NEXT: fminnm s6, s6, s5 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fcvtzu w8, s7 -; CHECK-NEXT: mov h7, v0.h[5] -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: mov v1.h[1], w8 -; CHECK-NEXT: fcvtzu w8, s4 -; CHECK-NEXT: fcvt s4, h7 -; CHECK-NEXT: fmaxnm s3, s3, s2 -; CHECK-NEXT: mov h7, v0.h[6] -; CHECK-NEXT: mov h0, v0.h[7] -; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: fcvtzu w8, s6 -; CHECK-NEXT: fmaxnm s4, s4, s2 -; CHECK-NEXT: fminnm s3, s3, s5 -; CHECK-NEXT: fcvt s6, h7 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: fminnm s3, s4, s5 -; CHECK-NEXT: fmaxnm s4, s6, s2 -; CHECK-NEXT: fmaxnm s0, s0, s2 -; CHECK-NEXT: mov v1.h[4], w8 -; CHECK-NEXT: fcvtzu w8, s3 -; CHECK-NEXT: fminnm s2, s4, s5 -; CHECK-NEXT: fminnm s0, s0, s5 -; CHECK-NEXT: mov v1.h[5], w8 -; CHECK-NEXT: fcvtzu w8, s2 -; CHECK-NEXT: mov v1.h[6], w8 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: mov v1.h[7], w8 -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i13: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov w8, #8191 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[3] +; CHECK-CVT-NEXT: fcvtzu w11, s2 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: fcvt s3, h1 +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fmov s1, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s3 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: mov v1.h[1], w9 +; CHECK-CVT-NEXT: csel w9, w11, w8, lo +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: mov v1.h[2], w9 +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov h2, v0.h[6] +; CHECK-CVT-NEXT: fcvtzu w11, s3 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: mov v1.h[3], w10 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov v1.h[4], w9 +; CHECK-CVT-NEXT: csel w9, w11, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov v1.h[5], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: mov v1.h[6], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo +; CHECK-CVT-NEXT: mov v1.h[7], w8 +; CHECK-CVT-NEXT: mov v0.16b, v1.16b +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzu w10, h0 +; CHECK-FP16-NEXT: mov w8, #8191 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h3, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu w9, h1 +; CHECK-FP16-NEXT: cmp w9, w8 +; CHECK-FP16-NEXT: csel w9, w9, w8, lo +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w10, w10, w8, lo +; CHECK-FP16-NEXT: fmov s1, w10 +; CHECK-FP16-NEXT: fcvtzu w10, h2 +; CHECK-FP16-NEXT: mov h2, v0.h[4] +; CHECK-FP16-NEXT: mov v1.h[1], w9 +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: fcvtzu w9, h3 +; CHECK-FP16-NEXT: csel w10, w10, w8, lo +; CHECK-FP16-NEXT: mov h3, v0.h[5] +; CHECK-FP16-NEXT: cmp w9, w8 +; CHECK-FP16-NEXT: mov v1.h[2], w10 +; CHECK-FP16-NEXT: csel w9, w9, w8, lo +; CHECK-FP16-NEXT: fcvtzu w10, h2 +; CHECK-FP16-NEXT: mov h2, v0.h[6] +; CHECK-FP16-NEXT: mov h0, v0.h[7] +; CHECK-FP16-NEXT: mov v1.h[3], w9 +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: fcvtzu w9, h3 +; CHECK-FP16-NEXT: csel w10, w10, w8, lo +; CHECK-FP16-NEXT: cmp w9, w8 +; CHECK-FP16-NEXT: mov v1.h[4], w10 +; CHECK-FP16-NEXT: csel w9, w9, w8, lo +; CHECK-FP16-NEXT: fcvtzu w10, h2 +; CHECK-FP16-NEXT: mov v1.h[5], w9 +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w9, w10, w8, lo +; CHECK-FP16-NEXT: fcvtzu w10, h0 +; CHECK-FP16-NEXT: mov v1.h[6], w9 +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: csel w8, w10, w8, lo +; CHECK-FP16-NEXT: mov v1.h[7], w8 +; CHECK-FP16-NEXT: mov v0.16b, v1.16b +; CHECK-FP16-NEXT: ret %x = call <8 x i13> @llvm.fptoui.sat.v8f16.v8i13(<8 x half> %f) ret <8 x i13> %x } @@ -2390,48 +2505,45 @@ ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i16: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: movi d2, #0000000000000000 -; CHECK-CVT-NEXT: mov w8, #65280 +; CHECK-CVT-NEXT: mov w8, #65535 ; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: movk w8, #18303, lsl #16 -; CHECK-CVT-NEXT: mov s3, v1.s[1] -; CHECK-CVT-NEXT: fmaxnm s4, s1, s2 -; CHECK-CVT-NEXT: fmov s5, w8 -; CHECK-CVT-NEXT: mov s6, v1.s[2] -; CHECK-CVT-NEXT: mov s7, v1.s[3] -; CHECK-CVT-NEXT: fmaxnm s3, s3, s2 -; CHECK-CVT-NEXT: fminnm s4, s4, s5 -; CHECK-CVT-NEXT: fmaxnm s6, s6, s2 -; CHECK-CVT-NEXT: fminnm s3, s3, s5 -; CHECK-CVT-NEXT: fcvtzu w8, s4 -; CHECK-CVT-NEXT: fminnm s4, s6, s5 -; CHECK-CVT-NEXT: mov s6, v0.s[1] -; CHECK-CVT-NEXT: fmov s1, w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fmaxnm s3, s7, s2 -; CHECK-CVT-NEXT: mov v1.h[1], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s4 -; CHECK-CVT-NEXT: fminnm s3, s3, s5 -; CHECK-CVT-NEXT: fmaxnm s4, s0, s2 -; CHECK-CVT-NEXT: mov v1.h[2], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fminnm s3, s4, s5 -; CHECK-CVT-NEXT: fmaxnm s4, s6, s2 -; CHECK-CVT-NEXT: mov s6, v0.s[2] +; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: mov s3, v1.s[3] +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov s2, v1.s[2] +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: fmov s1, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: mov v1.h[1], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: fcvtzu w9, s3 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: mov v1.h[2], w10 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: mov v1.h[3], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: mov s2, v0.s[2] ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: mov v1.h[3], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fminnm s3, s4, s5 -; CHECK-CVT-NEXT: fmaxnm s4, s6, s2 -; CHECK-CVT-NEXT: fmaxnm s0, s0, s2 -; CHECK-CVT-NEXT: mov v1.h[4], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s3 -; CHECK-CVT-NEXT: fminnm s2, s4, s5 -; CHECK-CVT-NEXT: fminnm s0, s0, s5 -; CHECK-CVT-NEXT: mov v1.h[5], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s2 -; CHECK-CVT-NEXT: mov v1.h[6], w8 -; CHECK-CVT-NEXT: fcvtzu w8, s0 +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: mov v1.h[4], w10 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov v1.h[5], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: mov v1.h[6], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo ; CHECK-CVT-NEXT: mov v1.h[7], w8 ; CHECK-CVT-NEXT: mov v0.16b, v1.16b ; CHECK-CVT-NEXT: ret @@ -2445,52 +2557,85 @@ } define <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) { -; CHECK-LABEL: test_unsigned_v8f16_v8i19: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov w8, #65504 -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: movk w8, #18687, lsl #16 -; CHECK-NEXT: mov h4, v0.h[2] -; CHECK-NEXT: mov h5, v0.h[3] -; CHECK-NEXT: movi d2, #0000000000000000 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov h6, v1.h[1] -; CHECK-NEXT: mov h7, v1.h[2] -; CHECK-NEXT: mov h16, v1.h[3] -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fmaxnm s0, s0, s2 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: fcvt s16, h16 -; CHECK-NEXT: fmaxnm s3, s3, s2 -; CHECK-NEXT: fmaxnm s4, s4, s2 -; CHECK-NEXT: fmaxnm s5, s5, s2 -; CHECK-NEXT: fmaxnm s1, s1, s2 -; CHECK-NEXT: fmov s17, w8 -; CHECK-NEXT: fmaxnm s6, s6, s2 -; CHECK-NEXT: fmaxnm s7, s7, s2 -; CHECK-NEXT: fmaxnm s2, s16, s2 -; CHECK-NEXT: fminnm s0, s0, s17 -; CHECK-NEXT: fminnm s3, s3, s17 -; CHECK-NEXT: fminnm s4, s4, s17 -; CHECK-NEXT: fminnm s5, s5, s17 -; CHECK-NEXT: fminnm s1, s1, s17 -; CHECK-NEXT: fminnm s6, s6, s17 -; CHECK-NEXT: fminnm s7, s7, s17 -; CHECK-NEXT: fminnm s2, s2, s17 -; CHECK-NEXT: fcvtzu w0, s0 -; CHECK-NEXT: fcvtzu w1, s3 -; CHECK-NEXT: fcvtzu w2, s4 -; CHECK-NEXT: fcvtzu w3, s5 -; CHECK-NEXT: fcvtzu w4, s1 -; CHECK-NEXT: fcvtzu w5, s6 -; CHECK-NEXT: fcvtzu w6, s7 -; CHECK-NEXT: fcvtzu w7, s2 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i19: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-CVT-NEXT: mov w8, #524287 +; CHECK-CVT-NEXT: mov h2, v0.h[1] +; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov h4, v1.h[1] +; CHECK-CVT-NEXT: mov h6, v1.h[2] +; CHECK-CVT-NEXT: mov h7, v1.h[3] +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: fcvt s0, h7 +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvtzu w11, s2 +; CHECK-CVT-NEXT: fcvtzu w12, s3 +; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: fcvtzu w13, s4 +; CHECK-CVT-NEXT: fcvtzu w15, s6 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: fcvtzu w16, s0 +; CHECK-CVT-NEXT: csel w4, w10, w8, lo +; CHECK-CVT-NEXT: cmp w13, w8 +; CHECK-CVT-NEXT: csel w5, w13, w8, lo +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: csel w6, w15, w8, lo +; CHECK-CVT-NEXT: cmp w16, w8 +; CHECK-CVT-NEXT: csel w7, w16, w8, lo +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w0, w9, w8, lo +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w1, w11, w8, lo +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: csel w2, w12, w8, lo +; CHECK-CVT-NEXT: cmp w14, w8 +; CHECK-CVT-NEXT: csel w3, w14, w8, lo +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i19: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-FP16-NEXT: mov w8, #524287 +; CHECK-FP16-NEXT: mov h2, v0.h[1] +; CHECK-FP16-NEXT: mov h3, v0.h[2] +; CHECK-FP16-NEXT: mov h5, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu w9, h0 +; CHECK-FP16-NEXT: mov h4, v1.h[1] +; CHECK-FP16-NEXT: mov h6, v1.h[2] +; CHECK-FP16-NEXT: mov h0, v1.h[3] +; CHECK-FP16-NEXT: fcvtzu w10, h1 +; CHECK-FP16-NEXT: fcvtzu w11, h2 +; CHECK-FP16-NEXT: fcvtzu w12, h3 +; CHECK-FP16-NEXT: fcvtzu w14, h5 +; CHECK-FP16-NEXT: fcvtzu w13, h4 +; CHECK-FP16-NEXT: fcvtzu w15, h6 +; CHECK-FP16-NEXT: cmp w10, w8 +; CHECK-FP16-NEXT: fcvtzu w16, h0 +; CHECK-FP16-NEXT: csel w4, w10, w8, lo +; CHECK-FP16-NEXT: cmp w13, w8 +; CHECK-FP16-NEXT: csel w5, w13, w8, lo +; CHECK-FP16-NEXT: cmp w15, w8 +; CHECK-FP16-NEXT: csel w6, w15, w8, lo +; CHECK-FP16-NEXT: cmp w16, w8 +; CHECK-FP16-NEXT: csel w7, w16, w8, lo +; CHECK-FP16-NEXT: cmp w9, w8 +; CHECK-FP16-NEXT: csel w0, w9, w8, lo +; CHECK-FP16-NEXT: cmp w11, w8 +; CHECK-FP16-NEXT: csel w1, w11, w8, lo +; CHECK-FP16-NEXT: cmp w12, w8 +; CHECK-FP16-NEXT: csel w2, w12, w8, lo +; CHECK-FP16-NEXT: cmp w14, w8 +; CHECK-FP16-NEXT: csel w3, w14, w8, lo +; CHECK-FP16-NEXT: ret %x = call <8 x i19> @llvm.fptoui.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x } @@ -2536,67 +2681,85 @@ } define <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) { -; CHECK-LABEL: test_unsigned_v8f16_v8i50: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvt s1, h0 -; CHECK-NEXT: mov w8, #1484783615 -; CHECK-NEXT: mov h2, v0.h[1] -; CHECK-NEXT: mov x9, #1125899906842623 -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: fcvtzu x8, s1 -; CHECK-NEXT: fcmp s1, #0.0 -; CHECK-NEXT: fcvt s2, h2 -; CHECK-NEXT: csel x8, xzr, x8, lt -; CHECK-NEXT: fcmp s1, s3 -; CHECK-NEXT: mov h1, v0.h[2] -; CHECK-NEXT: fcvtzu x10, s2 -; CHECK-NEXT: csel x0, x9, x8, gt -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: csel x8, xzr, x10, lt -; CHECK-NEXT: fcmp s2, s3 -; CHECK-NEXT: mov h2, v0.h[3] -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fcvtzu x10, s1 -; CHECK-NEXT: csel x1, x9, x8, gt -; CHECK-NEXT: fcmp s1, #0.0 -; CHECK-NEXT: fcvt s2, h2 -; CHECK-NEXT: csel x8, xzr, x10, lt -; CHECK-NEXT: fcmp s1, s3 -; CHECK-NEXT: fcvt s1, h0 -; CHECK-NEXT: fcvtzu x10, s2 -; CHECK-NEXT: csel x2, x9, x8, gt -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: csel x8, xzr, x10, lt -; CHECK-NEXT: fcmp s2, s3 -; CHECK-NEXT: mov h2, v0.h[1] -; CHECK-NEXT: fcvtzu x10, s1 -; CHECK-NEXT: csel x3, x9, x8, gt -; CHECK-NEXT: fcmp s1, #0.0 -; CHECK-NEXT: fcvt s2, h2 -; CHECK-NEXT: csel x8, xzr, x10, lt -; CHECK-NEXT: fcmp s1, s3 -; CHECK-NEXT: mov h1, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fcvtzu x10, s2 -; CHECK-NEXT: csel x4, x9, x8, gt -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: csel x8, xzr, x10, lt -; CHECK-NEXT: fcmp s2, s3 -; CHECK-NEXT: fcvtzu x10, s1 -; CHECK-NEXT: csel x5, x9, x8, gt -; CHECK-NEXT: fcmp s1, #0.0 -; CHECK-NEXT: csel x8, xzr, x10, lt -; CHECK-NEXT: fcmp s1, s3 -; CHECK-NEXT: fcvtzu x10, s0 -; CHECK-NEXT: csel x6, x9, x8, gt -; CHECK-NEXT: fcmp s0, #0.0 -; CHECK-NEXT: csel x8, xzr, x10, lt -; CHECK-NEXT: fcmp s0, s3 -; CHECK-NEXT: csel x7, x9, x8, gt -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i50: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-CVT-NEXT: mov x8, #1125899906842623 +; CHECK-CVT-NEXT: mov h2, v0.h[1] +; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov h4, v1.h[1] +; CHECK-CVT-NEXT: mov h6, v1.h[2] +; CHECK-CVT-NEXT: mov h7, v1.h[3] +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvtzu x9, s0 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: fcvt s0, h7 +; CHECK-CVT-NEXT: fcvtzu x10, s1 +; CHECK-CVT-NEXT: fcvtzu x11, s2 +; CHECK-CVT-NEXT: fcvtzu x12, s3 +; CHECK-CVT-NEXT: fcvtzu x14, s5 +; CHECK-CVT-NEXT: fcvtzu x13, s4 +; CHECK-CVT-NEXT: fcvtzu x15, s6 +; CHECK-CVT-NEXT: cmp x10, x8 +; CHECK-CVT-NEXT: fcvtzu x16, s0 +; CHECK-CVT-NEXT: csel x4, x10, x8, lo +; CHECK-CVT-NEXT: cmp x13, x8 +; CHECK-CVT-NEXT: csel x5, x13, x8, lo +; CHECK-CVT-NEXT: cmp x15, x8 +; CHECK-CVT-NEXT: csel x6, x15, x8, lo +; CHECK-CVT-NEXT: cmp x16, x8 +; CHECK-CVT-NEXT: csel x7, x16, x8, lo +; CHECK-CVT-NEXT: cmp x9, x8 +; CHECK-CVT-NEXT: csel x0, x9, x8, lo +; CHECK-CVT-NEXT: cmp x11, x8 +; CHECK-CVT-NEXT: csel x1, x11, x8, lo +; CHECK-CVT-NEXT: cmp x12, x8 +; CHECK-CVT-NEXT: csel x2, x12, x8, lo +; CHECK-CVT-NEXT: cmp x14, x8 +; CHECK-CVT-NEXT: csel x3, x14, x8, lo +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i50: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-FP16-NEXT: mov x8, #1125899906842623 +; CHECK-FP16-NEXT: mov h2, v0.h[1] +; CHECK-FP16-NEXT: mov h3, v0.h[2] +; CHECK-FP16-NEXT: mov h5, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu x9, h0 +; CHECK-FP16-NEXT: mov h4, v1.h[1] +; CHECK-FP16-NEXT: mov h6, v1.h[2] +; CHECK-FP16-NEXT: mov h0, v1.h[3] +; CHECK-FP16-NEXT: fcvtzu x10, h1 +; CHECK-FP16-NEXT: fcvtzu x11, h2 +; CHECK-FP16-NEXT: fcvtzu x12, h3 +; CHECK-FP16-NEXT: fcvtzu x14, h5 +; CHECK-FP16-NEXT: fcvtzu x13, h4 +; CHECK-FP16-NEXT: fcvtzu x15, h6 +; CHECK-FP16-NEXT: cmp x10, x8 +; CHECK-FP16-NEXT: fcvtzu x16, h0 +; CHECK-FP16-NEXT: csel x4, x10, x8, lo +; CHECK-FP16-NEXT: cmp x13, x8 +; CHECK-FP16-NEXT: csel x5, x13, x8, lo +; CHECK-FP16-NEXT: cmp x15, x8 +; CHECK-FP16-NEXT: csel x6, x15, x8, lo +; CHECK-FP16-NEXT: cmp x16, x8 +; CHECK-FP16-NEXT: csel x7, x16, x8, lo +; CHECK-FP16-NEXT: cmp x9, x8 +; CHECK-FP16-NEXT: csel x0, x9, x8, lo +; CHECK-FP16-NEXT: cmp x11, x8 +; CHECK-FP16-NEXT: csel x1, x11, x8, lo +; CHECK-FP16-NEXT: cmp x12, x8 +; CHECK-FP16-NEXT: csel x2, x12, x8, lo +; CHECK-FP16-NEXT: cmp x14, x8 +; CHECK-FP16-NEXT: csel x3, x14, x8, lo +; CHECK-FP16-NEXT: ret %x = call <8 x i50> @llvm.fptoui.sat.v8f16.v8i50(<8 x half> %f) ret <8 x i50> %x }