Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -737,6 +737,20 @@ case ISD::SELECT: Results.push_back(ExpandSELECT(Node)); return; + case ISD::SELECT_CC: { + if (Node->getValueType(0).isScalableVector()) { + EVT CondVT = TLI.getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); + SDValue SetCC = + DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0), + Node->getOperand(1), Node->getOperand(4)); + Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC, + Node->getOperand(2), + Node->getOperand(3))); + return; + } + break; + } case ISD::FP_TO_UINT: ExpandFP_TO_UINT(Node, Results); return; @@ -833,6 +847,24 @@ return; } break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: { + EVT FPVT = Node->getOperand(0).getValueType(); + // Expand the fpsosisat if it is scalable or the expansion with fmin/fmax + // will be legal and the scalar fptoi_sat is not prefered. Otherwise leave + // the fptoi_sat to be unrolled below. + if (FPVT.isScalableVector() || + (TLI.isOperationLegalOrCustom(ISD::FMINNUM, FPVT) && + !TLI.shouldConvertFpToSat( + Node->getOpcode(), FPVT.getScalarType(), + cast(Node->getOperand(1))->getVT()))) { + if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) { + Results.push_back(Expanded); + return; + } + } + break; + } case ISD::SMULFIX: case ISD::UMULFIX: if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6154,8 +6154,8 @@ assert(N1.getValueType().isVector() == VT.isVector() && "FP_TO_*INT_SAT type should be vector iff the operand type is " "vector!"); - assert((!VT.isVector() || VT.getVectorNumElements() == - N1.getValueType().getVectorNumElements()) && + assert((!VT.isVector() || VT.getVectorElementCount() == + N1.getValueType().getVectorElementCount()) && "Vector element counts must match in FP_TO_*INT_SAT"); assert(!cast(N2)->getVT().isVector() && "Type to saturate to must be a scalar."); @@ -8977,6 +8977,11 @@ "True and False arms of SelectCC must have same type!"); assert(Ops[2].getValueType() == VT && "select_cc node must be of same type as true and false value!"); + assert((!Ops[0].getValueType().isVector() || + Ops[0].getValueType().getVectorElementCount() == + VT.getVectorElementCount()) && + "Expected select_cc with vector result to have the same sized " + "comparison type!"); break; case ISD::BR_CC: assert(NumOps == 5 && "BR_CC takes 5 operands!"); Index: llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -770,19 +770,28 @@ ; CHECK-LABEL: test_signed_v2f32_v2i50: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: mov x8, #562949953421311 -; CHECK-NEXT: fcvtzs x10, s0 -; CHECK-NEXT: mov x11, #-562949953421312 -; CHECK-NEXT: fcvtzs x9, s1 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: csel x9, x9, x8, lt -; CHECK-NEXT: cmp x9, x11 -; CHECK-NEXT: csel x9, x9, x11, gt -; CHECK-NEXT: cmp x10, x8 -; CHECK-NEXT: csel x8, x10, x8, lt -; CHECK-NEXT: cmp x8, x11 -; CHECK-NEXT: csel x8, x8, x11, gt +; CHECK-NEXT: fcvtl v2.2d, v0.2s +; CHECK-NEXT: movi v1.2s, #216, lsl #24 +; CHECK-NEXT: mov s3, v0.s[1] +; CHECK-NEXT: mov w8, #1476395007 +; CHECK-NEXT: mov x10, #562949953421311 +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fmov s4, w8 +; CHECK-NEXT: fcmp s3, s1 +; CHECK-NEXT: mov x8, #-562949953421312 +; CHECK-NEXT: mov x9, v2.d[1] +; CHECK-NEXT: fmov x11, d2 +; CHECK-NEXT: csel x9, x8, x9, lt +; CHECK-NEXT: fcmp s3, s4 +; CHECK-NEXT: csel x9, x10, x9, gt +; CHECK-NEXT: fcmp s3, s3 +; CHECK-NEXT: csel x9, xzr, x9, vs +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csel x8, x8, x11, lt +; CHECK-NEXT: fcmp s0, s4 +; CHECK-NEXT: csel x8, x10, x8, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel x8, xzr, x8, vs ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret @@ -1027,31 +1036,46 @@ define <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i50: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov x9, #562949953421311 -; CHECK-NEXT: mov x10, #-562949953421312 -; CHECK-NEXT: fcvtzs x12, s0 -; CHECK-NEXT: mov s2, v1.s[1] -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: fcvtzs x11, s2 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: csel x2, x8, x10, gt -; CHECK-NEXT: cmp x11, x9 -; CHECK-NEXT: csel x8, x11, x9, lt -; CHECK-NEXT: fcvtzs x11, s1 -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: csel x3, x8, x10, gt -; CHECK-NEXT: cmp x12, x9 -; CHECK-NEXT: csel x8, x12, x9, lt -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: csel x0, x8, x10, gt -; CHECK-NEXT: cmp x11, x9 -; CHECK-NEXT: csel x8, x11, x9, lt -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: csel x1, x8, x10, gt +; CHECK-NEXT: fcvtl v1.2d, v0.2s +; CHECK-NEXT: movi v2.2s, #216, lsl #24 +; CHECK-NEXT: mov w8, #1476395007 +; CHECK-NEXT: mov x10, #562949953421311 +; CHECK-NEXT: mov s4, v0.s[1] +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: fcmp s0, s2 +; CHECK-NEXT: mov x8, #-562949953421312 +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: mov x11, v1.d[1] +; CHECK-NEXT: fcvtl2 v1.2d, v0.4s +; CHECK-NEXT: csel x9, x8, x9, lt +; CHECK-NEXT: fcmp s0, s3 +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: csel x9, x10, x9, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: csel x0, xzr, x9, vs +; CHECK-NEXT: fcmp s4, s2 +; CHECK-NEXT: csel x9, x8, x11, lt +; CHECK-NEXT: fcmp s4, s3 +; CHECK-NEXT: mov x11, v1.d[1] +; CHECK-NEXT: csel x9, x10, x9, gt +; CHECK-NEXT: fcmp s4, s4 +; CHECK-NEXT: mov s4, v0.s[1] +; CHECK-NEXT: csel x1, xzr, x9, vs +; CHECK-NEXT: fcmp s0, s2 +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: csel x9, x8, x9, lt +; CHECK-NEXT: fcmp s0, s3 +; CHECK-NEXT: csel x9, x10, x9, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel x2, xzr, x9, vs +; CHECK-NEXT: fcmp s4, s2 +; CHECK-NEXT: csel x8, x8, x11, lt +; CHECK-NEXT: fcmp s4, s3 +; CHECK-NEXT: csel x8, x10, x8, gt +; CHECK-NEXT: fcmp s4, s4 +; CHECK-NEXT: csel x3, xzr, x8, vs ; CHECK-NEXT: ret %x = call <4 x i50> @llvm.fptosi.sat.v4f32.v4i50(<4 x float> %f) ret <4 x i50> %x @@ -1301,17 +1325,19 @@ define <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzs w9, d0 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w8, w8, wzr, lt -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csinv w8, w8, wzr, ge -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel w9, w9, wzr, lt -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csinv w9, w9, wzr, ge +; CHECK-NEXT: fmov v1.2d, #-1.00000000 +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: fmaxnm v1.2d, v0.2d, v1.2d +; CHECK-NEXT: fminnm v1.2d, v1.2d, v2.2d +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcmp d2, d2 +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: csel w9, wzr, w9, vs ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -1323,21 +1349,24 @@ define <2 x i8> @test_signed_v2f64_v2i8(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: mov w8, #127 -; CHECK-NEXT: fcvtzs w10, d0 -; CHECK-NEXT: mov w11, #-128 -; CHECK-NEXT: fcvtzs w9, d1 -; CHECK-NEXT: cmp w9, #127 -; CHECK-NEXT: csel w9, w9, w8, lt -; CHECK-NEXT: cmn w9, #128 -; CHECK-NEXT: csel w9, w9, w11, gt -; CHECK-NEXT: cmp w10, #127 -; CHECK-NEXT: csel w8, w10, w8, lt -; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: csel w8, w8, w11, gt -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: mov x8, #-4584664420663164928 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: mov x8, #211106232532992 +; CHECK-NEXT: movk x8, #16479, lsl #48 +; CHECK-NEXT: fmaxnm v1.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: fminnm v1.2d, v1.2d, v2.2d +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcmp d2, d2 +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: csel w9, wzr, w9, vs +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i8> @llvm.fptosi.sat.v2f64.v2i8(<2 x double> %f) @@ -1347,21 +1376,24 @@ define <2 x i13> @test_signed_v2f64_v2i13(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: mov w8, #4095 -; CHECK-NEXT: fcvtzs w10, d0 -; CHECK-NEXT: mov w11, #-4096 -; CHECK-NEXT: fcvtzs w9, d1 -; CHECK-NEXT: cmp w9, #4095 -; CHECK-NEXT: csel w9, w9, w8, lt -; CHECK-NEXT: cmn w9, #1, lsl #12 // =4096 -; CHECK-NEXT: csel w9, w9, w11, gt -; CHECK-NEXT: cmp w10, #4095 -; CHECK-NEXT: csel w8, w10, w8, lt -; CHECK-NEXT: cmn w8, #1, lsl #12 // =4096 -; CHECK-NEXT: csel w8, w8, w11, gt -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: mov x8, #-4562146422526312448 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: mov x8, #279275953455104 +; CHECK-NEXT: movk x8, #16559, lsl #48 +; CHECK-NEXT: fmaxnm v1.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: fminnm v1.2d, v1.2d, v2.2d +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcmp d2, d2 +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: csel w9, wzr, w9, vs +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i13> @llvm.fptosi.sat.v2f64.v2i13(<2 x double> %f) @@ -1371,21 +1403,24 @@ define <2 x i16> @test_signed_v2f64_v2i16(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: mov w8, #32767 -; CHECK-NEXT: fcvtzs w10, d0 -; CHECK-NEXT: mov w11, #-32768 -; CHECK-NEXT: fcvtzs w9, d1 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w9, w9, w8, lt -; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w9, w9, w11, gt -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: csel w8, w10, w8, lt -; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w8, w8, w11, gt -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: mov x8, #-4548635623644200960 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: mov x8, #281200098803712 +; CHECK-NEXT: movk x8, #16607, lsl #48 +; CHECK-NEXT: fmaxnm v1.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: fminnm v1.2d, v1.2d, v2.2d +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcmp d2, d2 +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: csel w9, wzr, w9, vs +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i16> @llvm.fptosi.sat.v2f64.v2i16(<2 x double> %f) @@ -1395,21 +1430,24 @@ define <2 x i19> @test_signed_v2f64_v2i19(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: mov w8, #262143 -; CHECK-NEXT: fcvtzs w10, d0 -; CHECK-NEXT: mov w11, #-262144 -; CHECK-NEXT: fcvtzs w9, d1 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w9, w9, w8, lt -; CHECK-NEXT: cmn w9, #64, lsl #12 // =262144 -; CHECK-NEXT: csel w9, w9, w11, gt -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: csel w8, w10, w8, lt -; CHECK-NEXT: cmn w8, #64, lsl #12 // =262144 -; CHECK-NEXT: csel w8, w8, w11, gt -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: mov x8, #-4535124824762089472 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: mov x8, #281440616972288 +; CHECK-NEXT: movk x8, #16655, lsl #48 +; CHECK-NEXT: fmaxnm v1.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: fminnm v1.2d, v1.2d, v2.2d +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcmp d2, d2 +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: csel w9, wzr, w9, vs +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i19> @llvm.fptosi.sat.v2f64.v2i19(<2 x double> %f) @@ -1433,21 +1471,23 @@ define <2 x i50> @test_signed_v2f64_v2i50(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i50: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: mov x8, #562949953421311 -; CHECK-NEXT: fcvtzs x10, d0 -; CHECK-NEXT: mov x11, #-562949953421312 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: csel x9, x9, x8, lt -; CHECK-NEXT: cmp x9, x11 -; CHECK-NEXT: csel x9, x9, x11, gt -; CHECK-NEXT: cmp x10, x8 -; CHECK-NEXT: csel x8, x10, x8, lt -; CHECK-NEXT: cmp x8, x11 -; CHECK-NEXT: csel x8, x8, x11, gt -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: mov x8, #-4395513236313604096 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: mov x8, #-16 +; CHECK-NEXT: movk x8, #17151, lsl #48 +; CHECK-NEXT: fmaxnm v1.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: fminnm v1.2d, v1.2d, v2.2d +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcmp d2, d2 +; CHECK-NEXT: mov x8, v1.d[1] +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: csel x8, xzr, x8, vs +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: csel x9, xzr, x9, vs +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %x = call <2 x i50> @llvm.fptosi.sat.v2f64.v2i50(<2 x double> %f) ret <2 x i50> %x @@ -3321,63 +3361,66 @@ define <8 x i8> @test_signed_v8f64_v8i8(<8 x double> %f) { ; CHECK-LABEL: test_signed_v8f64_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: mov w8, #127 -; CHECK-NEXT: fcvtzs w10, d3 -; CHECK-NEXT: mov w11, #-128 -; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: fcvtzs w13, d2 -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzs w9, d4 -; CHECK-NEXT: mov d4, v2.d[1] -; CHECK-NEXT: fcvtzs w14, d3 -; CHECK-NEXT: cmp w9, #127 -; CHECK-NEXT: csel w9, w9, w8, lt -; CHECK-NEXT: fcvtzs w12, d4 -; CHECK-NEXT: cmn w9, #128 -; CHECK-NEXT: csel w9, w9, w11, gt -; CHECK-NEXT: cmp w10, #127 -; CHECK-NEXT: csel w10, w10, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: csel w10, w10, w11, gt -; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: csel w12, w12, w8, lt -; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: csel w12, w12, w11, gt -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: fmov s5, w10 -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: csel w13, w13, w11, gt -; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: csel w14, w14, w8, lt -; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: csel w10, w14, w11, gt -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: fcvtzs w14, d1 -; CHECK-NEXT: csel w15, w15, w8, lt -; CHECK-NEXT: cmn w15, #128 +; CHECK-NEXT: mov x8, #-4584664420663164928 +; CHECK-NEXT: mov d17, v3.d[1] +; CHECK-NEXT: dup v4.2d, x8 +; CHECK-NEXT: mov x8, #211106232532992 +; CHECK-NEXT: movk x8, #16479, lsl #48 +; CHECK-NEXT: fcmp d17, d17 +; CHECK-NEXT: fmaxnm v5.2d, v3.2d, v4.2d +; CHECK-NEXT: fmaxnm v7.2d, v2.2d, v4.2d +; CHECK-NEXT: dup v6.2d, x8 +; CHECK-NEXT: fmaxnm v16.2d, v1.2d, v4.2d +; CHECK-NEXT: fminnm v5.2d, v5.2d, v6.2d +; CHECK-NEXT: fminnm v7.2d, v7.2d, v6.2d +; CHECK-NEXT: fmaxnm v4.2d, v0.2d, v4.2d +; CHECK-NEXT: fminnm v16.2d, v16.2d, v6.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fminnm v4.2d, v4.2d, v6.2d +; CHECK-NEXT: mov d6, v2.d[1] +; CHECK-NEXT: xtn v5.2s, v5.2d +; CHECK-NEXT: xtn v7.2s, v7.2d +; CHECK-NEXT: mov w8, v5.s[1] +; CHECK-NEXT: fmov w9, s5 +; CHECK-NEXT: mov w10, v7.s[1] +; CHECK-NEXT: mov d5, v1.d[1] +; CHECK-NEXT: fmov w11, s7 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcmp d3, d3 +; CHECK-NEXT: fcvtzs v3.2d, v16.2d +; CHECK-NEXT: mov d16, v0.d[1] +; CHECK-NEXT: csel w9, wzr, w9, vs +; CHECK-NEXT: fcmp d6, d6 +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: csel w10, wzr, w10, vs +; CHECK-NEXT: fcmp d2, d2 +; CHECK-NEXT: fcvtzs v2.2d, v4.2d +; CHECK-NEXT: mov w12, v3.s[1] +; CHECK-NEXT: csel w11, wzr, w11, vs +; CHECK-NEXT: fcmp d5, d5 +; CHECK-NEXT: fmov s7, w9 +; CHECK-NEXT: csel w9, wzr, w12, vs +; CHECK-NEXT: fcmp d1, d1 +; CHECK-NEXT: xtn v1.2s, v2.2d +; CHECK-NEXT: mov v7.s[1], w8 +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov w12, v1.s[1] +; CHECK-NEXT: fmov s6, w11 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcmp d16, d16 +; CHECK-NEXT: mov v6.s[1], w10 +; CHECK-NEXT: csel w11, wzr, w12, vs +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: fmov s5, w8 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: csel w8, wzr, w8, vs ; CHECK-NEXT: mov v5.s[1], w9 -; CHECK-NEXT: csel w9, w15, w11, gt -; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: fcvtzs w15, d0 -; CHECK-NEXT: fmov s4, w13 -; CHECK-NEXT: csel w13, w14, w8, lt -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: csel w13, w13, w11, gt -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: mov v4.s[1], w12 -; CHECK-NEXT: csel w8, w15, w8, lt -; CHECK-NEXT: fmov s3, w9 -; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: csel w8, w8, w11, gt -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: fmov s4, w8 ; CHECK-NEXT: adrp x8, .LCPI82_0 -; CHECK-NEXT: mov v2.s[1], w13 +; CHECK-NEXT: mov v4.s[1], w11 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI82_0] -; CHECK-NEXT: tbl v0.8b, { v2.16b, v3.16b, v4.16b, v5.16b }, v0.8b +; CHECK-NEXT: tbl v0.8b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.8b ; CHECK-NEXT: ret %x = call <8 x i8> @llvm.fptosi.sat.v8f64.v8i8(<8 x double> %f) ret <8 x i8> %x @@ -3386,140 +3429,143 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) { ; CHECK-LABEL: test_signed_v16f64_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v0.d[1] -; CHECK-NEXT: mov w8, #127 -; CHECK-NEXT: fcvtzs w11, d0 -; CHECK-NEXT: mov w9, #-128 -; CHECK-NEXT: fcvtzs w13, d1 -; CHECK-NEXT: mov d0, v2.d[1] -; CHECK-NEXT: fcvtzs w14, d2 -; CHECK-NEXT: fcvtzs w10, d16 -; CHECK-NEXT: mov d16, v1.d[1] -; CHECK-NEXT: mov d1, v3.d[1] -; CHECK-NEXT: fcvtzs w15, d0 -; CHECK-NEXT: cmp w10, #127 -; CHECK-NEXT: csel w10, w10, w8, lt -; CHECK-NEXT: fcvtzs w12, d16 -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: csel w11, w11, w8, lt -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: csel w12, w12, w8, lt -; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: csel w11, w13, w9, gt -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: csel w10, w15, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w13, d3 -; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: fcvtzs w11, d1 -; CHECK-NEXT: mov w15, v0.s[1] -; CHECK-NEXT: csel w14, w14, w8, lt -; CHECK-NEXT: mov v2.s[1], w12 -; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: csel w12, w14, w9, gt -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: csel w11, w11, w8, lt -; CHECK-NEXT: mov d1, v4.d[1] -; CHECK-NEXT: mov v0.b[1], w15 -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: fmov w14, s2 -; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: fmov s3, w12 -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov w12, v2.s[1] -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: mov v0.b[2], w14 -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: csel w13, w13, w9, gt -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: fcvtzs w14, d4 +; CHECK-NEXT: mov x8, #-4584664420663164928 +; CHECK-NEXT: mov d22, v0.d[1] +; CHECK-NEXT: dup v17.2d, x8 +; CHECK-NEXT: mov x8, #211106232532992 +; CHECK-NEXT: movk x8, #16479, lsl #48 +; CHECK-NEXT: fcmp d22, d22 +; CHECK-NEXT: fmaxnm v18.2d, v0.2d, v17.2d +; CHECK-NEXT: fmaxnm v20.2d, v1.2d, v17.2d +; CHECK-NEXT: dup v16.2d, x8 +; CHECK-NEXT: fmaxnm v19.2d, v2.2d, v17.2d +; CHECK-NEXT: fminnm v21.2d, v18.2d, v16.2d +; CHECK-NEXT: fminnm v20.2d, v20.2d, v16.2d +; CHECK-NEXT: fmaxnm v18.2d, v3.2d, v17.2d +; CHECK-NEXT: fminnm v19.2d, v19.2d, v16.2d +; CHECK-NEXT: fcvtzs v23.2d, v21.2d +; CHECK-NEXT: fcvtzs v20.2d, v20.2d +; CHECK-NEXT: fminnm v18.2d, v18.2d, v16.2d +; CHECK-NEXT: fcvtzs v19.2d, v19.2d +; CHECK-NEXT: xtn v23.2s, v23.2d +; CHECK-NEXT: xtn v20.2s, v20.2d +; CHECK-NEXT: mov w8, v23.s[1] +; CHECK-NEXT: fmov w9, s23 +; CHECK-NEXT: mov w10, v20.s[1] +; CHECK-NEXT: mov d23, v2.d[1] +; CHECK-NEXT: fmov w11, s20 +; CHECK-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-NEXT: fmaxnm v21.2d, v4.2d, v17.2d +; CHECK-NEXT: mov d20, v3.d[1] +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: mov d0, v1.d[1] +; CHECK-NEXT: fmaxnm v24.2d, v5.2d, v17.2d +; CHECK-NEXT: csel w9, wzr, w9, vs +; CHECK-NEXT: fmaxnm v22.2d, v6.2d, v17.2d +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: fmaxnm v17.2d, v7.2d, v17.2d +; CHECK-NEXT: csel w10, wzr, w10, vs +; CHECK-NEXT: fcmp d1, d1 +; CHECK-NEXT: xtn v1.2s, v19.2d +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov w12, v1.s[1] +; CHECK-NEXT: csel w9, wzr, w11, vs +; CHECK-NEXT: fcmp d23, d23 +; CHECK-NEXT: fmov w11, s1 +; CHECK-NEXT: fminnm v21.2d, v21.2d, v16.2d +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: csel w8, wzr, w12, vs +; CHECK-NEXT: fcmp d2, d2 +; CHECK-NEXT: fminnm v19.2d, v24.2d, v16.2d +; CHECK-NEXT: mov w12, v0.s[1] +; CHECK-NEXT: fminnm v22.2d, v22.2d, v16.2d +; CHECK-NEXT: fminnm v2.2d, v17.2d, v16.2d +; CHECK-NEXT: csel w9, wzr, w11, vs +; CHECK-NEXT: xtn v16.2s, v18.2d +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: fcmp d20, d20 +; CHECK-NEXT: mov w11, v16.s[1] +; CHECK-NEXT: fcvtzs v17.2d, v21.2d +; CHECK-NEXT: fmov w10, s16 +; CHECK-NEXT: mov v0.b[1], w12 +; CHECK-NEXT: mov d16, v4.d[1] +; CHECK-NEXT: fmov w12, s1 +; CHECK-NEXT: csel w11, wzr, w11, vs +; CHECK-NEXT: fcmp d3, d3 +; CHECK-NEXT: fmov s3, w9 +; CHECK-NEXT: xtn v17.2s, v17.2d +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: fcvtzs v18.2d, v19.2d +; CHECK-NEXT: mov v0.b[2], w12 ; CHECK-NEXT: mov d1, v5.d[1] -; CHECK-NEXT: mov v0.b[3], w12 -; CHECK-NEXT: fmov s4, w13 -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: fmov w13, s3 -; CHECK-NEXT: csel w10, w15, w8, lt -; CHECK-NEXT: mov w12, v3.s[1] -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: mov v0.b[4], w13 -; CHECK-NEXT: csel w14, w14, w8, lt -; CHECK-NEXT: mov v4.s[1], w11 -; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: csel w14, w14, w9, gt -; CHECK-NEXT: fcvtzs w13, d5 -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: mov d2, v6.d[1] -; CHECK-NEXT: mov v0.b[5], w12 -; CHECK-NEXT: csel w11, w15, w8, lt -; CHECK-NEXT: fmov w12, s4 -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: fmov s1, w14 -; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov w14, v4.s[1] +; CHECK-NEXT: mov v3.s[1], w8 +; CHECK-NEXT: csel w8, wzr, w10, vs +; CHECK-NEXT: mov w10, v17.s[1] +; CHECK-NEXT: fcmp d16, d16 +; CHECK-NEXT: fcvtzs v19.2d, v22.2d +; CHECK-NEXT: mov v0.b[3], w9 +; CHECK-NEXT: mov d16, v6.d[1] +; CHECK-NEXT: mov w9, v3.s[1] +; CHECK-NEXT: fmov w12, s3 +; CHECK-NEXT: xtn v3.2s, v18.2d +; CHECK-NEXT: fmov s18, w8 +; CHECK-NEXT: csel w8, wzr, w10, vs +; CHECK-NEXT: fcmp d4, d4 +; CHECK-NEXT: fmov w10, s17 +; CHECK-NEXT: mov v0.b[4], w12 +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: mov v18.s[1], w11 +; CHECK-NEXT: csel w10, wzr, w10, vs +; CHECK-NEXT: mov w11, v3.s[1] +; CHECK-NEXT: fcmp d1, d1 +; CHECK-NEXT: xtn v1.2s, v19.2d +; CHECK-NEXT: mov v0.b[5], w9 +; CHECK-NEXT: fmov s4, w10 +; CHECK-NEXT: fmov w12, s18 +; CHECK-NEXT: csel w10, wzr, w11, vs +; CHECK-NEXT: fcmp d5, d5 +; CHECK-NEXT: mov w9, v18.s[1] +; CHECK-NEXT: mov v4.s[1], w8 +; CHECK-NEXT: fmov w8, s3 ; CHECK-NEXT: mov v0.b[6], w12 -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: fcvtzs w15, d2 -; CHECK-NEXT: csel w13, w13, w9, gt -; CHECK-NEXT: fcvtzs w10, d6 -; CHECK-NEXT: mov v0.b[7], w14 -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: fmov w14, s1 -; CHECK-NEXT: csel w12, w15, w8, lt -; CHECK-NEXT: fmov s2, w13 -; CHECK-NEXT: mov w13, v1.s[1] -; CHECK-NEXT: mov d1, v7.d[1] -; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: fcvtzs w15, d7 -; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: cmp w10, #127 -; CHECK-NEXT: mov v0.b[8], w14 -; CHECK-NEXT: csel w10, w10, w8, lt -; CHECK-NEXT: mov v2.s[1], w11 -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w11, d1 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: mov v0.b[9], w13 -; CHECK-NEXT: fmov w14, s2 -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: csel w10, w11, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: mov w13, v2.s[1] -; CHECK-NEXT: mov v0.b[10], w14 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: mov v1.s[1], w12 -; CHECK-NEXT: csel w8, w15, w8, lt -; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: mov v0.b[11], w13 -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov v0.b[12], w9 -; CHECK-NEXT: mov v2.s[1], w10 +; CHECK-NEXT: mov w11, v1.s[1] +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcmp d16, d16 +; CHECK-NEXT: fmov w12, s4 +; CHECK-NEXT: mov v0.b[7], w9 +; CHECK-NEXT: mov w9, v4.s[1] +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: csel w8, wzr, w11, vs +; CHECK-NEXT: fcmp d6, d6 +; CHECK-NEXT: mov d4, v7.d[1] +; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: mov v0.b[8], w12 +; CHECK-NEXT: xtn v1.2s, v2.2d +; CHECK-NEXT: csel w10, wzr, w10, vs +; CHECK-NEXT: fcmp d4, d4 +; CHECK-NEXT: fmov w11, s3 +; CHECK-NEXT: mov w12, v3.s[1] +; CHECK-NEXT: mov v0.b[9], w9 +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: mov v0.b[10], w11 +; CHECK-NEXT: csel w9, wzr, w9, vs +; CHECK-NEXT: fcmp d7, d7 +; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.b[11], w12 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: mov w8, v2.s[1] +; CHECK-NEXT: mov v0.b[12], w10 +; CHECK-NEXT: mov v1.s[1], w9 ; CHECK-NEXT: mov v0.b[13], w8 -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov w9, v2.s[1] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: mov v0.b[14], w8 ; CHECK-NEXT: mov v0.b[15], w9 ; CHECK-NEXT: ret @@ -3530,63 +3576,66 @@ define <8 x i16> @test_signed_v8f64_v8i16(<8 x double> %f) { ; CHECK-LABEL: test_signed_v8f64_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: mov w8, #32767 -; CHECK-NEXT: fcvtzs w10, d3 -; CHECK-NEXT: mov w11, #-32768 -; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: fcvtzs w13, d2 -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzs w9, d4 -; CHECK-NEXT: mov d4, v2.d[1] -; CHECK-NEXT: fcvtzs w14, d3 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w9, w9, w8, lt -; CHECK-NEXT: fcvtzs w12, d4 -; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w9, w9, w11, gt -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: csel w10, w10, w8, lt -; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w10, w10, w11, gt -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: csel w12, w12, w8, lt -; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w12, w12, w11, gt -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: fmov s5, w10 -; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w13, w13, w11, gt -; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: csel w14, w14, w8, lt -; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w10, w14, w11, gt -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: fcvtzs w14, d1 -; CHECK-NEXT: csel w15, w15, w8, lt -; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 +; CHECK-NEXT: mov x8, #-4548635623644200960 +; CHECK-NEXT: mov d17, v3.d[1] +; CHECK-NEXT: dup v4.2d, x8 +; CHECK-NEXT: mov x8, #281200098803712 +; CHECK-NEXT: movk x8, #16607, lsl #48 +; CHECK-NEXT: fcmp d17, d17 +; CHECK-NEXT: fmaxnm v5.2d, v3.2d, v4.2d +; CHECK-NEXT: fmaxnm v7.2d, v2.2d, v4.2d +; CHECK-NEXT: dup v6.2d, x8 +; CHECK-NEXT: fmaxnm v16.2d, v1.2d, v4.2d +; CHECK-NEXT: fminnm v5.2d, v5.2d, v6.2d +; CHECK-NEXT: fminnm v7.2d, v7.2d, v6.2d +; CHECK-NEXT: fmaxnm v4.2d, v0.2d, v4.2d +; CHECK-NEXT: fminnm v16.2d, v16.2d, v6.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fminnm v4.2d, v4.2d, v6.2d +; CHECK-NEXT: mov d6, v2.d[1] +; CHECK-NEXT: xtn v5.2s, v5.2d +; CHECK-NEXT: xtn v7.2s, v7.2d +; CHECK-NEXT: mov w8, v5.s[1] +; CHECK-NEXT: fmov w9, s5 +; CHECK-NEXT: mov w10, v7.s[1] +; CHECK-NEXT: mov d5, v1.d[1] +; CHECK-NEXT: fmov w11, s7 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcmp d3, d3 +; CHECK-NEXT: fcvtzs v3.2d, v16.2d +; CHECK-NEXT: mov d16, v0.d[1] +; CHECK-NEXT: csel w9, wzr, w9, vs +; CHECK-NEXT: fcmp d6, d6 +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: csel w10, wzr, w10, vs +; CHECK-NEXT: fcmp d2, d2 +; CHECK-NEXT: fcvtzs v2.2d, v4.2d +; CHECK-NEXT: mov w12, v3.s[1] +; CHECK-NEXT: csel w11, wzr, w11, vs +; CHECK-NEXT: fcmp d5, d5 +; CHECK-NEXT: fmov s7, w9 +; CHECK-NEXT: csel w9, wzr, w12, vs +; CHECK-NEXT: fcmp d1, d1 +; CHECK-NEXT: xtn v1.2s, v2.2d +; CHECK-NEXT: mov v7.s[1], w8 +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov w12, v1.s[1] +; CHECK-NEXT: fmov s6, w11 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fcmp d16, d16 +; CHECK-NEXT: mov v6.s[1], w10 +; CHECK-NEXT: csel w11, wzr, w12, vs +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: fmov s5, w8 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: csel w8, wzr, w8, vs ; CHECK-NEXT: mov v5.s[1], w9 -; CHECK-NEXT: csel w9, w15, w11, gt -; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: fcvtzs w15, d0 -; CHECK-NEXT: fmov s4, w13 -; CHECK-NEXT: csel w13, w14, w8, lt -; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w13, w13, w11, gt -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: mov v4.s[1], w12 -; CHECK-NEXT: csel w8, w15, w8, lt -; CHECK-NEXT: fmov s3, w9 -; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w8, w8, w11, gt -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: fmov s4, w8 ; CHECK-NEXT: adrp x8, .LCPI84_0 -; CHECK-NEXT: mov v2.s[1], w13 +; CHECK-NEXT: mov v4.s[1], w11 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI84_0] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b ; CHECK-NEXT: ret %x = call <8 x i16> @llvm.fptosi.sat.v8f64.v8i16(<8 x double> %f) ret <8 x i16> %x @@ -3595,114 +3644,117 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) { ; CHECK-LABEL: test_signed_v16f64_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v3.d[1] -; CHECK-NEXT: mov w9, #32767 -; CHECK-NEXT: fcvtzs w11, d3 -; CHECK-NEXT: mov w8, #-32768 -; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: fcvtzs w14, d2 -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: mov d1, v7.d[1] -; CHECK-NEXT: fcvtzs w10, d16 -; CHECK-NEXT: mov d16, v2.d[1] -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: fcvtzs w18, d0 -; CHECK-NEXT: mov d0, v6.d[1] -; CHECK-NEXT: fcvtzs w0, d7 -; CHECK-NEXT: cmp w10, w9 -; CHECK-NEXT: fcvtzs w2, d6 -; CHECK-NEXT: csel w10, w10, w9, lt -; CHECK-NEXT: fcvtzs w12, d16 -; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-NEXT: fcvtzs w17, d2 -; CHECK-NEXT: csel w10, w10, w8, gt -; CHECK-NEXT: cmp w11, w9 -; CHECK-NEXT: csel w11, w11, w9, lt -; CHECK-NEXT: fcvtzs w1, d0 -; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-NEXT: mov d0, v4.d[1] -; CHECK-NEXT: csel w13, w11, w8, gt -; CHECK-NEXT: cmp w12, w9 -; CHECK-NEXT: csel w11, w12, w9, lt -; CHECK-NEXT: fcvtzs w12, d3 -; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w11, w11, w8, gt -; CHECK-NEXT: cmp w14, w9 -; CHECK-NEXT: csel w14, w14, w9, lt -; CHECK-NEXT: fmov s19, w13 -; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w14, w14, w8, gt -; CHECK-NEXT: cmp w12, w9 -; CHECK-NEXT: csel w12, w12, w9, lt -; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w12, w12, w8, gt -; CHECK-NEXT: cmp w15, w9 -; CHECK-NEXT: csel w15, w15, w9, lt -; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w16, w15, w8, gt -; CHECK-NEXT: cmp w17, w9 -; CHECK-NEXT: csel w15, w17, w9, lt -; CHECK-NEXT: fcvtzs w17, d1 -; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 +; CHECK-NEXT: mov x8, #-4548635623644200960 +; CHECK-NEXT: mov d25, v3.d[1] +; CHECK-NEXT: dup v17.2d, x8 +; CHECK-NEXT: mov x8, #281200098803712 +; CHECK-NEXT: movk x8, #16607, lsl #48 +; CHECK-NEXT: fcmp d25, d25 +; CHECK-NEXT: fmaxnm v18.2d, v3.2d, v17.2d +; CHECK-NEXT: fmaxnm v19.2d, v2.2d, v17.2d +; CHECK-NEXT: dup v16.2d, x8 +; CHECK-NEXT: fmaxnm v20.2d, v1.2d, v17.2d +; CHECK-NEXT: fminnm v18.2d, v18.2d, v16.2d +; CHECK-NEXT: fminnm v19.2d, v19.2d, v16.2d +; CHECK-NEXT: fmaxnm v21.2d, v0.2d, v17.2d +; CHECK-NEXT: fmaxnm v22.2d, v7.2d, v17.2d +; CHECK-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-NEXT: fmaxnm v23.2d, v6.2d, v17.2d +; CHECK-NEXT: fcvtzs v19.2d, v19.2d +; CHECK-NEXT: fmaxnm v24.2d, v5.2d, v17.2d +; CHECK-NEXT: fmaxnm v17.2d, v4.2d, v17.2d +; CHECK-NEXT: xtn v18.2s, v18.2d +; CHECK-NEXT: fminnm v20.2d, v20.2d, v16.2d +; CHECK-NEXT: mov w8, v18.s[1] +; CHECK-NEXT: fminnm v21.2d, v21.2d, v16.2d +; CHECK-NEXT: fmov w9, s18 +; CHECK-NEXT: fminnm v22.2d, v22.2d, v16.2d +; CHECK-NEXT: fminnm v23.2d, v23.2d, v16.2d +; CHECK-NEXT: fminnm v24.2d, v24.2d, v16.2d +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: fminnm v16.2d, v17.2d, v16.2d +; CHECK-NEXT: mov d17, v2.d[1] +; CHECK-NEXT: fcmp d3, d3 +; CHECK-NEXT: xtn v3.2s, v19.2d +; CHECK-NEXT: fcvtzs v19.2d, v20.2d +; CHECK-NEXT: mov w10, v3.s[1] +; CHECK-NEXT: fmov w11, s3 +; CHECK-NEXT: csel w9, wzr, w9, vs +; CHECK-NEXT: fcmp d17, d17 +; CHECK-NEXT: mov d17, v1.d[1] +; CHECK-NEXT: mov d3, v0.d[1] +; CHECK-NEXT: xtn v18.2s, v19.2d +; CHECK-NEXT: csel w10, wzr, w10, vs +; CHECK-NEXT: fcmp d2, d2 +; CHECK-NEXT: fcvtzs v2.2d, v21.2d +; CHECK-NEXT: mov w12, v18.s[1] +; CHECK-NEXT: fmov w13, s18 +; CHECK-NEXT: csel w11, wzr, w11, vs +; CHECK-NEXT: fcmp d17, d17 +; CHECK-NEXT: xtn v2.2s, v2.2d +; CHECK-NEXT: csel w12, wzr, w12, vs +; CHECK-NEXT: fcmp d1, d1 +; CHECK-NEXT: fcvtzs v1.2d, v22.2d +; CHECK-NEXT: mov w14, v2.s[1] +; CHECK-NEXT: fmov w15, s2 +; CHECK-NEXT: mov d2, v6.d[1] +; CHECK-NEXT: csel w13, wzr, w13, vs +; CHECK-NEXT: fcmp d3, d3 +; CHECK-NEXT: mov d3, v7.d[1] +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: csel w14, wzr, w14, vs +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: fcvtzs v0.2d, v23.2d +; CHECK-NEXT: mov w16, v1.s[1] +; CHECK-NEXT: fmov w17, s1 ; CHECK-NEXT: mov d1, v5.d[1] -; CHECK-NEXT: csel w15, w15, w8, gt -; CHECK-NEXT: cmp w18, w9 -; CHECK-NEXT: csel w18, w18, w9, lt -; CHECK-NEXT: cmn w18, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w18, w18, w8, gt -; CHECK-NEXT: cmp w17, w9 -; CHECK-NEXT: csel w17, w17, w9, lt -; CHECK-NEXT: cmn w17, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w17, w17, w8, gt -; CHECK-NEXT: cmp w0, w9 -; CHECK-NEXT: csel w0, w0, w9, lt -; CHECK-NEXT: cmn w0, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w13, w0, w8, gt -; CHECK-NEXT: cmp w1, w9 -; CHECK-NEXT: csel w1, w1, w9, lt -; CHECK-NEXT: fcvtzs w0, d1 -; CHECK-NEXT: cmn w1, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v19.s[1], w10 -; CHECK-NEXT: csel w10, w1, w8, gt -; CHECK-NEXT: cmp w2, w9 -; CHECK-NEXT: fcvtzs w1, d5 -; CHECK-NEXT: csel w2, w2, w9, lt -; CHECK-NEXT: fmov s18, w14 -; CHECK-NEXT: cmn w2, #8, lsl #12 // =32768 -; CHECK-NEXT: fmov s23, w13 -; CHECK-NEXT: csel w2, w2, w8, gt -; CHECK-NEXT: cmp w0, w9 -; CHECK-NEXT: csel w14, w0, w9, lt -; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w13, w14, w8, gt -; CHECK-NEXT: cmp w1, w9 -; CHECK-NEXT: fcvtzs w14, d0 -; CHECK-NEXT: csel w0, w1, w9, lt -; CHECK-NEXT: cmn w0, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v18.s[1], w11 -; CHECK-NEXT: csel w11, w0, w8, gt -; CHECK-NEXT: mov v23.s[1], w17 -; CHECK-NEXT: cmp w14, w9 -; CHECK-NEXT: fcvtzs w17, d4 -; CHECK-NEXT: csel w14, w14, w9, lt -; CHECK-NEXT: fmov s22, w2 -; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w14, w14, w8, gt -; CHECK-NEXT: fmov s17, w16 -; CHECK-NEXT: cmp w17, w9 -; CHECK-NEXT: mov v22.s[1], w10 -; CHECK-NEXT: csel w9, w17, w9, lt +; CHECK-NEXT: csel w15, wzr, w15, vs +; CHECK-NEXT: fcmp d3, d3 +; CHECK-NEXT: fcvtzs v3.2d, v24.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: csel w16, wzr, w16, vs +; CHECK-NEXT: fcmp d7, d7 +; CHECK-NEXT: mov w18, v0.s[1] +; CHECK-NEXT: csel w17, wzr, w17, vs +; CHECK-NEXT: fcmp d2, d2 +; CHECK-NEXT: xtn v2.2s, v3.2d +; CHECK-NEXT: fcvtzs v3.2d, v16.2d +; CHECK-NEXT: fmov s19, w9 +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: csel w18, wzr, w18, vs +; CHECK-NEXT: fcmp d6, d6 +; CHECK-NEXT: mov w0, v2.s[1] +; CHECK-NEXT: fmov s23, w17 +; CHECK-NEXT: mov v19.s[1], w8 +; CHECK-NEXT: mov d0, v4.d[1] +; CHECK-NEXT: csel w9, wzr, w9, vs +; CHECK-NEXT: fcmp d1, d1 +; CHECK-NEXT: fmov s18, w11 +; CHECK-NEXT: xtn v1.2s, v3.2d +; CHECK-NEXT: fmov w11, s2 +; CHECK-NEXT: csel w8, wzr, w0, vs +; CHECK-NEXT: fcmp d5, d5 +; CHECK-NEXT: mov v23.s[1], w16 +; CHECK-NEXT: mov w16, v1.s[1] +; CHECK-NEXT: fmov s22, w9 +; CHECK-NEXT: csel w11, wzr, w11, vs +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: mov v18.s[1], w10 +; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: mov v22.s[1], w18 +; CHECK-NEXT: csel w9, wzr, w16, vs +; CHECK-NEXT: fcmp d4, d4 +; CHECK-NEXT: fmov s17, w13 ; CHECK-NEXT: fmov s21, w11 -; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w8, w9, w8, gt -; CHECK-NEXT: adrp x9, .LCPI85_0 +; CHECK-NEXT: csel w10, wzr, w10, vs ; CHECK-NEXT: mov v17.s[1], w12 -; CHECK-NEXT: mov v21.s[1], w13 -; CHECK-NEXT: fmov s16, w18 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI85_0] -; CHECK-NEXT: fmov s20, w8 -; CHECK-NEXT: mov v16.s[1], w15 -; CHECK-NEXT: mov v20.s[1], w14 +; CHECK-NEXT: mov v21.s[1], w8 +; CHECK-NEXT: adrp x8, .LCPI85_0 +; CHECK-NEXT: fmov s16, w15 +; CHECK-NEXT: fmov s20, w10 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI85_0] +; CHECK-NEXT: mov v16.s[1], w14 +; CHECK-NEXT: mov v20.s[1], w9 ; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b ; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -689,14 +689,22 @@ ; CHECK-LABEL: test_unsigned_v2f32_v2i50: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fcvtzu x9, s0 -; CHECK-NEXT: mov x10, #1125899906842623 -; CHECK-NEXT: fcvtzu x8, s1 -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: csel x8, x8, x10, lo -; CHECK-NEXT: cmp x9, x10 -; CHECK-NEXT: csel x9, x9, x10, lo +; CHECK-NEXT: fcvtl v1.2d, v0.2s +; CHECK-NEXT: mov s2, v0.s[1] +; CHECK-NEXT: mov w8, #1484783615 +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fcmp s2, #0.0 +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: mov x9, v1.d[1] +; CHECK-NEXT: fmov x10, d1 +; CHECK-NEXT: csel x8, xzr, x9, lt +; CHECK-NEXT: fcmp s2, s3 +; CHECK-NEXT: mov x9, #1125899906842623 +; CHECK-NEXT: csel x8, x9, x8, gt +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel x10, xzr, x10, lt +; CHECK-NEXT: fcmp s0, s3 +; CHECK-NEXT: csel x9, x9, x10, gt ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret @@ -903,22 +911,36 @@ define <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i50: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov x8, #1125899906842623 +; CHECK-NEXT: fcvtl v1.2d, v0.2s +; CHECK-NEXT: mov w8, #1484783615 +; CHECK-NEXT: fcmp s0, #0.0 ; CHECK-NEXT: mov s3, v0.s[1] -; CHECK-NEXT: fcvtzu x11, s0 -; CHECK-NEXT: mov s2, v1.s[1] -; CHECK-NEXT: fcvtzu x9, s1 -; CHECK-NEXT: fcvtzu x12, s3 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: fcvtzu x10, s2 -; CHECK-NEXT: csel x2, x9, x8, lo -; CHECK-NEXT: cmp x10, x8 -; CHECK-NEXT: csel x3, x10, x8, lo -; CHECK-NEXT: cmp x11, x8 -; CHECK-NEXT: csel x0, x11, x8, lo -; CHECK-NEXT: cmp x12, x8 -; CHECK-NEXT: csel x1, x12, x8, lo +; CHECK-NEXT: mov x10, #1125899906842623 +; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: mov x9, v1.d[1] +; CHECK-NEXT: fcvtl2 v1.2d, v0.4s +; CHECK-NEXT: csel x8, xzr, x8, lt +; CHECK-NEXT: fcmp s0, s2 +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: csel x0, x10, x8, gt +; CHECK-NEXT: fcmp s3, #0.0 +; CHECK-NEXT: csel x8, xzr, x9, lt +; CHECK-NEXT: fcmp s3, s2 +; CHECK-NEXT: mov s3, v0.s[1] +; CHECK-NEXT: mov x9, v1.d[1] +; CHECK-NEXT: csel x1, x10, x8, gt +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: csel x8, xzr, x8, lt +; CHECK-NEXT: fcmp s0, s2 +; CHECK-NEXT: csel x2, x10, x8, gt +; CHECK-NEXT: fcmp s3, #0.0 +; CHECK-NEXT: csel x8, xzr, x9, lt +; CHECK-NEXT: fcmp s3, s2 +; CHECK-NEXT: csel x3, x10, x8, gt ; CHECK-NEXT: ret %x = call <4 x i50> @llvm.fptoui.sat.v4f32.v4i50(<4 x float> %f) ret <4 x i50> %x @@ -1126,16 +1148,12 @@ define <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzu w9, d0 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: cmp w8, #1 -; CHECK-NEXT: csinc w8, w8, wzr, lo -; CHECK-NEXT: cmp w9, #1 -; CHECK-NEXT: csinc w9, w9, wzr, lo -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fmov v1.2d, #1.00000000 +; CHECK-NEXT: fminnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: ret %x = call <2 x i1> @llvm.fptoui.sat.v2f64.v2i1(<2 x double> %f) ret <2 x i1> %x @@ -1144,17 +1162,14 @@ define <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzu w10, d0 -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: csel w8, w10, w8, lo -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: mov x8, #246290604621824 +; CHECK-NEXT: movk x8, #16495, lsl #48 +; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: fminnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: ret %x = call <2 x i8> @llvm.fptoui.sat.v2f64.v2i8(<2 x double> %f) ret <2 x i8> %x @@ -1163,17 +1178,14 @@ define <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzu w9, d0 -; CHECK-NEXT: mov w10, #8191 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: cmp w8, w10 -; CHECK-NEXT: csel w8, w8, w10, lo -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: csel w9, w9, w10, lo -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: mov x8, #280375465082880 +; CHECK-NEXT: movk x8, #16575, lsl #48 +; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: fminnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: ret %x = call <2 x i13> @llvm.fptoui.sat.v2f64.v2i13(<2 x double> %f) ret <2 x i13> %x @@ -1182,17 +1194,14 @@ define <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzu w9, d0 -; CHECK-NEXT: mov w10, #65535 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: cmp w8, w10 -; CHECK-NEXT: csel w8, w8, w10, lo -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: csel w9, w9, w10, lo -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: mov x8, #281337537757184 +; CHECK-NEXT: movk x8, #16623, lsl #48 +; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: fminnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: ret %x = call <2 x i16> @llvm.fptoui.sat.v2f64.v2i16(<2 x double> %f) ret <2 x i16> %x @@ -1201,17 +1210,14 @@ define <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzu w9, d0 -; CHECK-NEXT: mov w10, #524287 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: cmp w8, w10 -; CHECK-NEXT: csel w8, w8, w10, lo -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: csel w9, w9, w10, lo -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: mov x8, #281457796841472 +; CHECK-NEXT: movk x8, #16671, lsl #48 +; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: fminnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: ret %x = call <2 x i19> @llvm.fptoui.sat.v2f64.v2i19(<2 x double> %f) ret <2 x i19> %x @@ -1234,16 +1240,13 @@ define <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i50: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzu x9, d0 -; CHECK-NEXT: mov x10, #1125899906842623 -; CHECK-NEXT: fcvtzu x8, d1 -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: csel x8, x8, x10, lo -; CHECK-NEXT: cmp x9, x10 -; CHECK-NEXT: csel x9, x9, x10, lo -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: mov x8, #-8 +; CHECK-NEXT: movk x8, #17167, lsl #48 +; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: fminnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d ; CHECK-NEXT: ret %x = call <2 x i50> @llvm.fptoui.sat.v2f64.v2i50(<2 x double> %f) ret <2 x i50> %x @@ -2768,46 +2771,29 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) { ; CHECK-LABEL: test_unsigned_v8f64_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: fcvtzu w10, d3 -; CHECK-NEXT: mov d3, v2.d[1] -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: fcvtzu w12, d2 -; CHECK-NEXT: fcvtzu w13, d1 -; CHECK-NEXT: fcvtzu w9, d4 -; CHECK-NEXT: mov d4, v1.d[1] -; CHECK-NEXT: fcvtzu w11, d3 -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: fmov s19, w10 -; CHECK-NEXT: fcvtzu w10, d4 -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v19.s[1], w9 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: fmov s18, w12 -; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: csel w12, w13, w8, lo -; CHECK-NEXT: fcvtzu w13, d0 -; CHECK-NEXT: mov v18.s[1], w11 -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: fmov s17, w12 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: mov v17.s[1], w10 -; CHECK-NEXT: fmov s16, w8 +; CHECK-NEXT: movi v4.2d, #0000000000000000 +; CHECK-NEXT: mov x8, #246290604621824 +; CHECK-NEXT: movk x8, #16495, lsl #48 +; CHECK-NEXT: fmaxnm v3.2d, v3.2d, v4.2d +; CHECK-NEXT: dup v5.2d, x8 +; CHECK-NEXT: fmaxnm v2.2d, v2.2d, v4.2d ; CHECK-NEXT: adrp x8, .LCPI82_0 -; CHECK-NEXT: mov v16.s[1], w9 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI82_0] -; CHECK-NEXT: tbl v0.8b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.8b +; CHECK-NEXT: fmaxnm v1.2d, v1.2d, v4.2d +; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v4.2d +; CHECK-NEXT: fminnm v3.2d, v3.2d, v5.2d +; CHECK-NEXT: fminnm v2.2d, v2.2d, v5.2d +; CHECK-NEXT: fminnm v1.2d, v1.2d, v5.2d +; CHECK-NEXT: fminnm v0.2d, v0.2d, v5.2d +; CHECK-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-NEXT: fcvtzu v2.2d, v2.2d +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: xtn v6.2s, v3.2d +; CHECK-NEXT: xtn v5.2s, v2.2d +; CHECK-NEXT: xtn v4.2s, v1.2d +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI82_0] +; CHECK-NEXT: xtn v3.2s, v0.2d +; CHECK-NEXT: tbl v0.8b, { v3.16b, v4.16b, v5.16b, v6.16b }, v1.8b ; CHECK-NEXT: ret %x = call <8 x i8> @llvm.fptoui.sat.v8f64.v8i8(<8 x double> %f) ret <8 x i8> %x @@ -2816,105 +2802,68 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) { ; CHECK-LABEL: test_unsigned_v16f64_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v0.d[1] -; CHECK-NEXT: fcvtzu w10, d0 -; CHECK-NEXT: mov d0, v1.d[1] -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: fcvtzu w12, d1 -; CHECK-NEXT: mov d1, v2.d[1] -; CHECK-NEXT: fcvtzu w9, d16 -; CHECK-NEXT: fcvtzu w11, d0 -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: csel w10, w11, w8, lo -; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: csel w11, w12, w8, lo -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcvtzu w11, d2 -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: mov w12, v0.s[1] -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: mov d2, v4.d[1] -; CHECK-NEXT: mov v0.b[1], w12 -; CHECK-NEXT: fmov w13, s1 -; CHECK-NEXT: mov w12, v1.s[1] -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcvtzu w11, d3 -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v0.b[2], w13 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: csel w9, w10, w8, lo -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: mov d2, v5.d[1] -; CHECK-NEXT: mov v0.b[3], w12 -; CHECK-NEXT: fmov w12, s1 -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov w13, v1.s[1] -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcvtzu w11, d4 -; CHECK-NEXT: mov v0.b[4], w12 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: csel w9, w10, w8, lo -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w10, w11, w8, lo -; CHECK-NEXT: mov v0.b[5], w13 -; CHECK-NEXT: fcvtzu w13, d2 -; CHECK-NEXT: fmov w11, s1 -; CHECK-NEXT: mov w12, v1.s[1] -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: fcvtzu w10, d5 -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: mov v0.b[6], w11 -; CHECK-NEXT: mov d2, v6.d[1] -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: csel w9, w13, w8, lo -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: fcvtzu w13, d6 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: mov v0.b[7], w12 -; CHECK-NEXT: fcvtzu w12, d2 -; CHECK-NEXT: fmov w11, s1 -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: mov w10, v1.s[1] -; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: mov d1, v7.d[1] -; CHECK-NEXT: mov v0.b[8], w11 -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: csel w9, w12, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: csel w11, w13, w8, lo -; CHECK-NEXT: fcvtzu w13, d7 -; CHECK-NEXT: mov v0.b[9], w10 -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: fmov s3, w11 -; CHECK-NEXT: fcvtzu w11, d1 -; CHECK-NEXT: mov w12, v2.s[1] -; CHECK-NEXT: mov v0.b[10], w10 -; CHECK-NEXT: mov v3.s[1], w9 -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w9, w11, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: mov v0.b[11], w12 -; CHECK-NEXT: fmov w10, s3 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, v3.s[1] -; CHECK-NEXT: mov v0.b[12], w10 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: mov v0.b[13], w8 +; CHECK-NEXT: movi v16.2d, #0000000000000000 +; CHECK-NEXT: mov x8, #246290604621824 +; CHECK-NEXT: movk x8, #16495, lsl #48 +; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v16.2d +; CHECK-NEXT: dup v17.2d, x8 +; CHECK-NEXT: fmaxnm v1.2d, v1.2d, v16.2d +; CHECK-NEXT: fmaxnm v2.2d, v2.2d, v16.2d +; CHECK-NEXT: fminnm v0.2d, v0.2d, v17.2d +; CHECK-NEXT: fminnm v1.2d, v1.2d, v17.2d +; CHECK-NEXT: fminnm v2.2d, v2.2d, v17.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: mov v0.b[1], w8 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-NEXT: fmaxnm v2.2d, v3.2d, v16.2d +; CHECK-NEXT: fmaxnm v3.2d, v4.2d, v16.2d +; CHECK-NEXT: mov v0.b[2], w8 +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: fminnm v2.2d, v2.2d, v17.2d +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.b[3], w9 +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-NEXT: fminnm v2.2d, v3.2d, v17.2d +; CHECK-NEXT: mov v0.b[4], w8 +; CHECK-NEXT: fmaxnm v3.2d, v6.2d, v16.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: mov v0.b[5], w9 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-NEXT: fmaxnm v2.2d, v5.2d, v16.2d +; CHECK-NEXT: mov v0.b[6], w8 +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: fminnm v2.2d, v2.2d, v17.2d +; CHECK-NEXT: mov v0.b[7], w9 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-NEXT: mov v0.b[8], w8 +; CHECK-NEXT: fminnm v2.2d, v3.2d, v17.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: mov v0.b[9], w9 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-NEXT: fmaxnm v2.2d, v7.2d, v16.2d +; CHECK-NEXT: mov v0.b[10], w8 +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: fminnm v2.2d, v2.2d, v17.2d +; CHECK-NEXT: mov v0.b[11], w9 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-NEXT: mov v0.b[12], w8 +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: mov v0.b[13], w9 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: mov v0.b[14], w8 @@ -2927,46 +2876,29 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) { ; CHECK-LABEL: test_unsigned_v8f64_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: fcvtzu w10, d3 -; CHECK-NEXT: mov d3, v2.d[1] -; CHECK-NEXT: mov w8, #65535 -; CHECK-NEXT: fcvtzu w12, d2 -; CHECK-NEXT: fcvtzu w13, d1 -; CHECK-NEXT: fcvtzu w9, d4 -; CHECK-NEXT: mov d4, v1.d[1] -; CHECK-NEXT: fcvtzu w11, d3 -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: fmov s19, w10 -; CHECK-NEXT: fcvtzu w10, d4 -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: mov v19.s[1], w9 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: fmov s18, w12 -; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: csel w12, w13, w8, lo -; CHECK-NEXT: fcvtzu w13, d0 -; CHECK-NEXT: mov v18.s[1], w11 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: fmov s17, w12 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: mov v17.s[1], w10 -; CHECK-NEXT: fmov s16, w8 +; CHECK-NEXT: movi v4.2d, #0000000000000000 +; CHECK-NEXT: mov x8, #281337537757184 +; CHECK-NEXT: movk x8, #16623, lsl #48 +; CHECK-NEXT: fmaxnm v3.2d, v3.2d, v4.2d +; CHECK-NEXT: dup v5.2d, x8 +; CHECK-NEXT: fmaxnm v2.2d, v2.2d, v4.2d ; CHECK-NEXT: adrp x8, .LCPI84_0 -; CHECK-NEXT: mov v16.s[1], w9 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI84_0] -; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b +; CHECK-NEXT: fmaxnm v1.2d, v1.2d, v4.2d +; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v4.2d +; CHECK-NEXT: fminnm v3.2d, v3.2d, v5.2d +; CHECK-NEXT: fminnm v2.2d, v2.2d, v5.2d +; CHECK-NEXT: fminnm v1.2d, v1.2d, v5.2d +; CHECK-NEXT: fminnm v0.2d, v0.2d, v5.2d +; CHECK-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-NEXT: fcvtzu v2.2d, v2.2d +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: xtn v6.2s, v3.2d +; CHECK-NEXT: xtn v5.2s, v2.2d +; CHECK-NEXT: xtn v4.2s, v1.2d +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI84_0] +; CHECK-NEXT: xtn v3.2s, v0.2d +; CHECK-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v1.16b ; CHECK-NEXT: ret %x = call <8 x i16> @llvm.fptoui.sat.v8f64.v8i16(<8 x double> %f) ret <8 x i16> %x @@ -2975,81 +2907,44 @@ define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) { ; CHECK-LABEL: test_unsigned_v16f64_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v3.d[1] -; CHECK-NEXT: fcvtzu w9, d3 -; CHECK-NEXT: mov d3, v2.d[1] -; CHECK-NEXT: mov w8, #65535 -; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: mov d2, v1.d[1] -; CHECK-NEXT: fcvtzu w11, d1 -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzu w12, d16 -; CHECK-NEXT: fcvtzu w13, d0 -; CHECK-NEXT: fcvtzu w14, d3 -; CHECK-NEXT: mov d0, v7.d[1] -; CHECK-NEXT: fcvtzu w15, d2 -; CHECK-NEXT: fcvtzu w17, d6 -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: fcvtzu w16, d1 -; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: csel w14, w14, w8, lo -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: fmov s19, w9 -; CHECK-NEXT: csel w9, w15, w8, lo -; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: fcvtzu w15, d0 -; CHECK-NEXT: mov d0, v6.d[1] -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: mov v19.s[1], w12 -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fcvtzu w12, d7 -; CHECK-NEXT: fmov s18, w10 -; CHECK-NEXT: csel w10, w16, w8, lo -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: fcvtzu w16, d0 -; CHECK-NEXT: csel w13, w13, w8, lo -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: csel w15, w15, w8, lo -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: mov d0, v5.d[1] -; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: mov v18.s[1], w14 -; CHECK-NEXT: fmov s23, w12 -; CHECK-NEXT: csel w12, w16, w8, lo -; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: fcvtzu w16, d0 -; CHECK-NEXT: mov d0, v4.d[1] -; CHECK-NEXT: csel w14, w17, w8, lo -; CHECK-NEXT: fcvtzu w17, d5 -; CHECK-NEXT: fmov s17, w11 -; CHECK-NEXT: mov v23.s[1], w15 -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s22, w14 -; CHECK-NEXT: csel w14, w16, w8, lo -; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: fcvtzu w16, d0 -; CHECK-NEXT: csel w15, w17, w8, lo -; CHECK-NEXT: fcvtzu w11, d4 -; CHECK-NEXT: mov v22.s[1], w12 -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s21, w15 -; CHECK-NEXT: csel w12, w16, w8, lo -; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: csel w8, w11, w8, lo -; CHECK-NEXT: mov v17.s[1], w9 -; CHECK-NEXT: adrp x9, .LCPI85_0 -; CHECK-NEXT: mov v21.s[1], w14 -; CHECK-NEXT: fmov s16, w13 -; CHECK-NEXT: fmov s20, w8 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI85_0] -; CHECK-NEXT: mov v16.s[1], w10 -; CHECK-NEXT: mov v20.s[1], w12 +; CHECK-NEXT: movi v16.2d, #0000000000000000 +; CHECK-NEXT: mov x8, #281337537757184 +; CHECK-NEXT: movk x8, #16623, lsl #48 +; CHECK-NEXT: fmaxnm v3.2d, v3.2d, v16.2d +; CHECK-NEXT: dup v17.2d, x8 +; CHECK-NEXT: fmaxnm v2.2d, v2.2d, v16.2d +; CHECK-NEXT: adrp x8, .LCPI85_0 +; CHECK-NEXT: fmaxnm v7.2d, v7.2d, v16.2d +; CHECK-NEXT: fmaxnm v6.2d, v6.2d, v16.2d +; CHECK-NEXT: fminnm v3.2d, v3.2d, v17.2d +; CHECK-NEXT: fmaxnm v1.2d, v1.2d, v16.2d +; CHECK-NEXT: fmaxnm v5.2d, v5.2d, v16.2d +; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v16.2d +; CHECK-NEXT: fminnm v2.2d, v2.2d, v17.2d +; CHECK-NEXT: fmaxnm v4.2d, v4.2d, v16.2d +; CHECK-NEXT: fminnm v7.2d, v7.2d, v17.2d +; CHECK-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-NEXT: fminnm v6.2d, v6.2d, v17.2d +; CHECK-NEXT: fcvtzu v2.2d, v2.2d +; CHECK-NEXT: fminnm v1.2d, v1.2d, v17.2d +; CHECK-NEXT: fminnm v5.2d, v5.2d, v17.2d +; CHECK-NEXT: fminnm v0.2d, v0.2d, v17.2d +; CHECK-NEXT: fminnm v4.2d, v4.2d, v17.2d +; CHECK-NEXT: fcvtzu v7.2d, v7.2d +; CHECK-NEXT: xtn v19.2s, v3.2d +; CHECK-NEXT: fcvtzu v3.2d, v6.2d +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: fcvtzu v5.2d, v5.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: xtn v18.2s, v2.2d +; CHECK-NEXT: fcvtzu v2.2d, v4.2d +; CHECK-NEXT: xtn v23.2s, v7.2d +; CHECK-NEXT: xtn v22.2s, v3.2d +; CHECK-NEXT: xtn v17.2s, v1.2d +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI85_0] +; CHECK-NEXT: xtn v21.2s, v5.2d +; CHECK-NEXT: xtn v16.2s, v0.2d +; CHECK-NEXT: xtn v20.2s, v2.2d ; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b ; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll @@ -0,0 +1,720 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s + +; Float + +declare @llvm.fptosi.sat.nxv2f32.nxv2i32() +declare @llvm.fptosi.sat.nxv4f32.nxv4i32() +declare @llvm.fptosi.sat.nxv8f32.nxv8i32() +declare @llvm.fptosi.sat.nxv4f32.nxv4i16() +declare @llvm.fptosi.sat.nxv8f32.nxv8i16() +declare @llvm.fptosi.sat.nxv2f32.nxv2i64() +declare @llvm.fptosi.sat.nxv4f32.nxv4i64() + +define @test_signed_v2f32_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f32_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-822083584 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #0xffffffff80000000 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov w8, #1325400063 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.d, #0x7fffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f32.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-822083584 +; CHECK-NEXT: mov w9, #-2147483648 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov w8, #1325400063 +; CHECK-NEXT: mov z2.s, w9 +; CHECK-NEXT: mov w9, #2147483647 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z3.s +; CHECK-NEXT: mov z2.s, w9 +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f32.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f32_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-822083584 +; CHECK-NEXT: mov w9, #-2147483648 +; CHECK-NEXT: mov w10, #1325400063 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: fcvtzs z5.s, p0/m, z0.s +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, z2.s +; CHECK-NEXT: mov z3.s, w9 +; CHECK-NEXT: mov z4.s, w10 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z5.s, p1/m, z3.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z4.s +; CHECK-NEXT: mov z6.s, w8 +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.s, p0/m, z1.s +; CHECK-NEXT: sel z3.s, p2, z3.s, z2.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z4.s +; CHECK-NEXT: sel z2.s, p1, z6.s, z5.s +; CHECK-NEXT: mov z3.s, p2/m, z6.s +; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z0.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z1.s, z1.s +; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z3.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f32.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-956301312 +; CHECK-NEXT: mov w9, #65024 +; CHECK-NEXT: movk w9, #18175, lsl #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.s, w9 +; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f32.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f32_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-956301312 +; CHECK-NEXT: mov w9, #65024 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movk w9, #18175, lsl #16 +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: fcvtzs z4.s, p0/m, z1.s +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: mov z3.s, w9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z3.s +; CHECK-NEXT: mov z4.s, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzs z2.s, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z5.s, w8 +; CHECK-NEXT: mov z2.s, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z3.s +; CHECK-NEXT: sel z3.s, p2, z5.s, z4.s +; CHECK-NEXT: mov z2.s, p1/m, z5.s +; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.h, z2.h, z3.h +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f32.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f32_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f32_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-553648128 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov w8, #1593835519 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f32.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f32_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-553648128 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: mov w9, #1593835519 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: uunpkhi z5.d, z0.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.s +; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z1.s +; CHECK-NEXT: mov z4.s, w9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.s, p0/z, z3.s, z4.s +; CHECK-NEXT: mov z0.d, p1/m, z2.d +; CHECK-NEXT: fcmge p1.s, p0/z, z5.s, z1.s +; CHECK-NEXT: movprfx z1, z5 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmgt p1.s, p0/z, z5.s, z4.s +; CHECK-NEXT: mov z0.d, p2/m, z6.d +; CHECK-NEXT: mov z1.d, p1/m, z6.d +; CHECK-NEXT: fcmuo p1.s, p0/z, z3.s, z3.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z5.s, z5.s +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f32.nxv4i64( %f) + ret %x +} + +; Double + +declare @llvm.fptosi.sat.nxv2f64.nxv2i32() +declare @llvm.fptosi.sat.nxv4f64.nxv4i32() +declare @llvm.fptosi.sat.nxv8f64.nxv8i32() +declare @llvm.fptosi.sat.nxv4f64.nxv4i16() +declare @llvm.fptosi.sat.nxv8f64.nxv8i16() +declare @llvm.fptosi.sat.nxv2f64.nxv2i64() +declare @llvm.fptosi.sat.nxv4f64.nxv4i64() + +define @test_signed_v2f64_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f64_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4476578029606273024 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #0xffffffff80000000 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov x8, #281474972516352 +; CHECK-NEXT: movk x8, #16863, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z2.d, #0x7fffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f64.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4476578029606273024 +; CHECK-NEXT: mov x9, #281474972516352 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x9, #16863, lsl #48 +; CHECK-NEXT: mov z3.d, #0xffffffff80000000 +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.d +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov z6.d, #0x7fffffff +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d +; CHECK-NEXT: mov z5.d, x9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z5.d +; CHECK-NEXT: mov z4.d, p1/m, z3.d +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z0.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.d, p1/m, z3.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z5.d +; CHECK-NEXT: sel z3.d, p2, z6.d, z4.d +; CHECK-NEXT: mov z2.d, p1/m, z6.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.s, z2.s, z3.s +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f64.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f64_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f64_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4476578029606273024 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z5.d, #0xffffffff80000000 +; CHECK-NEXT: movprfx z6, z1 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z1.d +; CHECK-NEXT: mov z24.d, #0x7fffffff +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: mov x8, #281474972516352 +; CHECK-NEXT: movk x8, #16863, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z4.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d +; CHECK-NEXT: mov z6.d, p1/m, z5.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z7.d, x8 +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z7.d +; CHECK-NEXT: mov z6.d, p1/m, z24.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d +; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p2/m, z5.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z7.d +; CHECK-NEXT: mov z1.d, p2/m, z24.d +; CHECK-NEXT: fcmge p2.d, p0/z, z3.d, z4.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z0.d +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p2/m, z5.d +; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d +; CHECK-NEXT: movprfx z4, z2 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z2.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z7.d +; CHECK-NEXT: mov z4.d, p2/m, z5.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z7.d +; CHECK-NEXT: sel z5.d, p1, z24.d, z0.d +; CHECK-NEXT: mov z4.d, p2/m, z24.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z2.d, z2.d +; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z4.d, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.s, z1.s, z6.s +; CHECK-NEXT: uzp1 z1.s, z4.s, z5.s +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f64.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4548635623644200960 +; CHECK-NEXT: mov x9, #281200098803712 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x9, #16607, lsl #48 +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.d +; CHECK-NEXT: mov z3.d, #32767 // =0x7fff +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d +; CHECK-NEXT: mov z5.d, x9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z5.d +; CHECK-NEXT: mov z4.d, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z0.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.d, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z5.d +; CHECK-NEXT: mov z4.d, p2/m, z3.d +; CHECK-NEXT: mov z2.d, p1/m, z3.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z4.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.s, z2.s, z4.s +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f64.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f64_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f64_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4548635623644200960 +; CHECK-NEXT: mov x9, #281200098803712 +; CHECK-NEXT: movk x9, #16607, lsl #48 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z5, z3 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z3.d +; CHECK-NEXT: mov z7.d, #32767 // =0x7fff +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, z4.d +; CHECK-NEXT: mov z6.d, x9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z3.d, z6.d +; CHECK-NEXT: mov z5.d, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d +; CHECK-NEXT: mov z5.d, p2/m, z7.d +; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d +; CHECK-NEXT: movprfx z3, z2 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z2.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z3.d, p2/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z6.d +; CHECK-NEXT: mov z3.d, p2/m, z7.d +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z4.d +; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p1.d, p0/z, z2.d, z2.d +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.d, p2/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z6.d +; CHECK-NEXT: mov z4.d, p2/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z6.d +; CHECK-NEXT: mov z2.d, p1/m, z7.d +; CHECK-NEXT: mov z4.d, p2/m, z7.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z4.d, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.s, z3.s, z5.s +; CHECK-NEXT: uzp1 z1.s, z4.s, z2.s +; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f64.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f64_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f64_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4332462841530417152 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f64.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f64_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4332462841530417152 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x9, #4890909195324358655 +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z5.d, x9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z5.d +; CHECK-NEXT: mov z4.d, p1/m, z3.d +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d +; CHECK-NEXT: sel z3.d, p1, z3.d, z2.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z5.d +; CHECK-NEXT: sel z2.d, p2, z6.d, z4.d +; CHECK-NEXT: mov z3.d, p1/m, z6.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z0.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z1.d, z1.d +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f64.nxv4i64( %f) + ret %x +} + + +; half + +declare @llvm.fptosi.sat.nxv2f16.nxv2i32() +declare @llvm.fptosi.sat.nxv4f16.nxv4i32() +declare @llvm.fptosi.sat.nxv8f16.nxv8i32() +declare @llvm.fptosi.sat.nxv4f16.nxv4i16() +declare @llvm.fptosi.sat.nxv8f16.nxv8i16() +declare @llvm.fptosi.sat.nxv2f16.nxv2i64() +declare @llvm.fptosi.sat.nxv4f16.nxv4i64() + +define @test_signed_v2f16_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f16_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI14_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI14_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z3.d, #0xffffffff80000000 +; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] +; CHECK-NEXT: adrp x8, .LCPI14_1 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI14_1 +; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, z3.d +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.d, #0x7fffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f16.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI15_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI15_0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: adrp x9, .LCPI15_1 +; CHECK-NEXT: add x9, x9, :lo12:.LCPI15_1 +; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] +; CHECK-NEXT: mov w8, #-2147483648 +; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x9] +; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, p1/m, z3.s +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f16.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f16_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f16_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI16_0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: adrp x9, .LCPI16_1 +; CHECK-NEXT: add x9, x9, :lo12:.LCPI16_1 +; CHECK-NEXT: uunpklo z2.s, z0.h +; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] +; CHECK-NEXT: mov w8, #-2147483648 +; CHECK-NEXT: uunpkhi z6.s, z0.h +; CHECK-NEXT: ld1rh { z3.s }, p0/z, [x9] +; CHECK-NEXT: movprfx z4, z2 +; CHECK-NEXT: fcvtzs z4.s, p0/m, z2.h +; CHECK-NEXT: mov z5.s, w8 +; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z1.h +; CHECK-NEXT: fcmge p2.h, p0/z, z6.h, z1.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z4.s, p1/m, z5.s +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z7.s, w8 +; CHECK-NEXT: movprfx z0, z6 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z6.h +; CHECK-NEXT: sel z1.s, p2, z5.s, z0.s +; CHECK-NEXT: fcmgt p2.h, p0/z, z6.h, z3.h +; CHECK-NEXT: sel z0.s, p1, z7.s, z4.s +; CHECK-NEXT: mov z1.s, p2/m, z7.s +; CHECK-NEXT: fcmuo p1.h, p0/z, z2.h, z2.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z6.h, z6.h +; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f16.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI17_0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] +; CHECK-NEXT: adrp x8, .LCPI17_1 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI17_1 +; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x8] +; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f16.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f16_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f16_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI18_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI18_0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1rh { z1.h }, p0/z, [x8] +; CHECK-NEXT: adrp x8, .LCPI18_1 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI18_1 +; CHECK-NEXT: ld1rh { z2.h }, p0/z, [x8] +; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.h, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.h, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z1.h, p1/m, z2.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.h, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f16.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f16_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f16_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI19_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI19_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] +; CHECK-NEXT: adrp x8, .LCPI19_1 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI19_1 +; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, z3.d +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f16.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f16_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI20_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z4.d, z0.s +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: uunpkhi z5.d, z0.s +; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] +; CHECK-NEXT: adrp x8, .LCPI20_1 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_1 +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: fcmge p1.h, p0/z, z4.h, z1.h +; CHECK-NEXT: movprfx z0, z4 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z0.d, p1/m, z3.d +; CHECK-NEXT: fcmge p1.h, p0/z, z5.h, z1.h +; CHECK-NEXT: movprfx z1, z5 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.h, p0/z, z4.h, z2.h +; CHECK-NEXT: mov z1.d, p1/m, z3.d +; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z2.h +; CHECK-NEXT: mov z0.d, p2/m, z6.d +; CHECK-NEXT: mov z1.d, p1/m, z6.d +; CHECK-NEXT: fcmuo p1.h, p0/z, z4.h, z4.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z5.h, z5.h +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f16.nxv4i64( %f) + ret %x +} + Index: llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll @@ -0,0 +1,556 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s + +; Float + +declare @llvm.fptoui.sat.nxv2f32.nxv2i32() +declare @llvm.fptoui.sat.nxv4f32.nxv4i32() +declare @llvm.fptoui.sat.nxv8f32.nxv8i32() +declare @llvm.fptoui.sat.nxv4f32.nxv4i16() +declare @llvm.fptoui.sat.nxv8f32.nxv8i16() +declare @llvm.fptoui.sat.nxv2f32.nxv2i64() +declare @llvm.fptoui.sat.nxv4f32.nxv4i64() + +define @test_signed_v2f32_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f32_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1333788671 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0xffffffff +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p2/m, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f32.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1333788671 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f32.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f32_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1333788671 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z4.s, w8 +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z4.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z4.s +; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z2.s, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z3.s, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f32.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65280 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movk w8, #18303, lsl #16 +; CHECK-NEXT: mov w9, #65535 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, w9 +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.s, p2/m, z1.s +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f32.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f32_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65280 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movk w8, #18303, lsl #16 +; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z2.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z2.s, p2, z0.s, z3.s +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f32.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f32_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f32_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1602224127 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f32.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f32_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1602224127 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: uunpkhi z3.d, z0.s +; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmge p2.s, p0/z, z3.s, #0.0 +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmgt p0.s, p0/z, z3.s, z2.s +; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f32.nxv4i64( %f) + ret %x +} + +; Double + +declare @llvm.fptoui.sat.nxv2f64.nxv2i32() +declare @llvm.fptoui.sat.nxv4f64.nxv4i32() +declare @llvm.fptoui.sat.nxv8f64.nxv8i32() +declare @llvm.fptoui.sat.nxv4f64.nxv4i16() +declare @llvm.fptoui.sat.nxv8f64.nxv8i16() +declare @llvm.fptoui.sat.nxv2f64.nxv2i64() +declare @llvm.fptoui.sat.nxv4f64.nxv4i64() + +define @test_signed_v2f64_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f64_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #281474974613504 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x8, #16879, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z1.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0xffffffff +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p2/m, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f64.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #281474974613504 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x8, #16879, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z2.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z0.d, #0xffffffff +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z2.d, p2, z0.d, z3.d +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f64.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f64_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f64_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #281474974613504 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x8, #16879, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: fcvtzu z5.d, p0/m, z1.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: movprfx z6, z0 +; CHECK-NEXT: fcvtzu z6.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z4.d +; CHECK-NEXT: mov z1.d, #0xffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z4.d +; CHECK-NEXT: sel z0.d, p2, z1.d, z5.d +; CHECK-NEXT: fcmge p2.d, p0/z, z3.d, #0.0 +; CHECK-NEXT: sel z5.d, p1, z1.d, z6.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z4.d +; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, #0.0 +; CHECK-NEXT: movprfx z6, z2 +; CHECK-NEXT: fcvtzu z6.d, p0/m, z2.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmgt p0.d, p0/z, z2.d, z4.d +; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0 +; CHECK-NEXT: sel z2.d, p1, z1.d, z3.d +; CHECK-NEXT: sel z1.d, p0, z1.d, z6.d +; CHECK-NEXT: uzp1 z0.s, z5.s, z0.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f64.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #281337537757184 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x8, #16623, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z2.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z0.d, #65535 // =0xffff +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z2.d, p2, z0.d, z3.d +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f64.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f64_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f64_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #281337537757184 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x8, #16623, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, #0.0 +; CHECK-NEXT: movprfx z5, z3 +; CHECK-NEXT: fcvtzu z5.d, p0/m, z3.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.d, p0/z, z2.d, #0.0 +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: movprfx z6, z2 +; CHECK-NEXT: fcvtzu z6.d, p0/m, z2.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z3.d, z4.d +; CHECK-NEXT: mov z3.d, #65535 // =0xffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.d, p0/z, z2.d, z4.d +; CHECK-NEXT: sel z2.d, p2, z3.d, z5.d +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: sel z5.d, p1, z3.d, z6.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d +; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: movprfx z6, z0 +; CHECK-NEXT: fcvtzu z6.d, p0/m, z0.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d +; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z3.d, z1.d +; CHECK-NEXT: sel z1.d, p0, z3.d, z6.d +; CHECK-NEXT: uzp1 z2.s, z5.s, z2.s +; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f64.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f64_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f64_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #4895412794951729151 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f64.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f64_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #4895412794951729151 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z4.d +; CHECK-NEXT: fcmgt p0.d, p0/z, z1.d, z4.d +; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z3.d, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f64.nxv4i64( %f) + ret %x +} + + +; half + +declare @llvm.fptoui.sat.nxv2f16.nxv2i32() +declare @llvm.fptoui.sat.nxv4f16.nxv4i32() +declare @llvm.fptoui.sat.nxv8f16.nxv8i32() +declare @llvm.fptoui.sat.nxv4f16.nxv4i16() +declare @llvm.fptoui.sat.nxv8f16.nxv8i16() +declare @llvm.fptoui.sat.nxv2f16.nxv2i64() +declare @llvm.fptoui.sat.nxv4f16.nxv4i64() + +define @test_signed_v2f16_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f16_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI14_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI14_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z0.d, #0xffffffff +; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f16.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI15_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI15_0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x8] +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f16.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f16_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f16_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI16_0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uunpklo z3.s, z0.h +; CHECK-NEXT: uunpkhi z4.s, z0.h +; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, #0.0 +; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x8] +; CHECK-NEXT: fcmge p2.h, p0/z, z4.h, #0.0 +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z3.h +; CHECK-NEXT: movprfx z1, z4 +; CHECK-NEXT: fcvtzu z1.s, p0/m, z4.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.s, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.h, p0/z, z3.h, z2.h +; CHECK-NEXT: fcmgt p0.h, p0/z, z4.h, z2.h +; CHECK-NEXT: mov z0.s, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f16.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI17_0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f16.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f16_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f16_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI18_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI18_0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1rh { z2.h }, p0/z, [x8] +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.h, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z1.h, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f16.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f16_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f16_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI19_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI19_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f16.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f16_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI20_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, #0.0 +; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: fcmge p2.h, p0/z, z4.h, #0.0 +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z3.h +; CHECK-NEXT: movprfx z1, z4 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z4.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.h, p0/z, z3.h, z2.h +; CHECK-NEXT: fcmgt p0.h, p0/z, z4.h, z2.h +; CHECK-NEXT: mov z0.d, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f16.nxv4i64( %f) + ret %x +} + Index: llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll @@ -0,0 +1,952 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK64 + +; Float + +declare @llvm.fptosi.sat.nxv2f32.nxv2i32() +declare @llvm.fptosi.sat.nxv4f32.nxv4i32() +declare @llvm.fptosi.sat.nxv8f32.nxv8i32() +declare @llvm.fptosi.sat.nxv4f32.nxv4i16() +declare @llvm.fptosi.sat.nxv8f32.nxv8i16() +declare @llvm.fptosi.sat.nxv2f32.nxv2i64() +declare @llvm.fptosi.sat.nxv4f32.nxv4i64() + +define @test_signed_v2f32_v2i32( %f) { +; CHECK32-LABEL: test_signed_v2f32_v2i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK32-NEXT: flw ft0, %lo(.LCPI0_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: lui a0, %hi(.LCPI0_1) +; CHECK32-NEXT: flw ft0, %lo(.LCPI0_1)(a0) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vmfne.vv v8, v8, v8 +; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK32-NEXT: vmv.v.v v0, v8 +; CHECK32-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f32_v2i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK64-NEXT: flw ft0, %lo(.LCPI0_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI0_1) +; CHECK64-NEXT: flw ft0, %lo(.LCPI0_1)(a0) +; CHECK64-NEXT: li a0, 1 +; CHECK64-NEXT: slli a0, a0, 31 +; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: lui a0, 524288 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vmfne.vv v8, v8, v8 +; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK64-NEXT: vmv.v.v v0, v8 +; CHECK64-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f32.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i32( %f) { +; CHECK32-LABEL: test_signed_v4f32_v4i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK32-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK32-NEXT: vmfge.vf v10, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v10 +; CHECK32-NEXT: lui a0, %hi(.LCPI1_1) +; CHECK32-NEXT: flw ft0, %lo(.LCPI1_1)(a0) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: vmerge.vxm v12, v10, a0, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vmfne.vv v10, v8, v8 +; CHECK32-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK32-NEXT: vmv1r.v v0, v10 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f32_v4i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK64-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmfge.vf v10, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v10 +; CHECK64-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI1_1) +; CHECK64-NEXT: flw ft0, %lo(.LCPI1_1)(a0) +; CHECK64-NEXT: li a0, 1 +; CHECK64-NEXT: slli a0, a0, 31 +; CHECK64-NEXT: vmerge.vxm v12, v10, a0, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: lui a0, 524288 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vmfne.vv v10, v8, v8 +; CHECK64-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK64-NEXT: vmv1r.v v0, v10 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f32.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f32_v8i32( %f) { +; CHECK32-LABEL: test_signed_v8f32_v8i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK32-NEXT: flw ft0, %lo(.LCPI2_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK32-NEXT: vmfge.vf v12, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v12 +; CHECK32-NEXT: lui a0, %hi(.LCPI2_1) +; CHECK32-NEXT: flw ft0, %lo(.LCPI2_1)(a0) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: vmerge.vxm v16, v12, a0, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vmfne.vv v12, v8, v8 +; CHECK32-NEXT: vmerge.vxm v8, v16, a0, v0 +; CHECK32-NEXT: vmv1r.v v0, v12 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v8f32_v8i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK64-NEXT: flw ft0, %lo(.LCPI2_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK64-NEXT: vmfge.vf v12, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v12 +; CHECK64-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI2_1) +; CHECK64-NEXT: flw ft0, %lo(.LCPI2_1)(a0) +; CHECK64-NEXT: li a0, 1 +; CHECK64-NEXT: slli a0, a0, 31 +; CHECK64-NEXT: vmerge.vxm v16, v12, a0, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: lui a0, 524288 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vmfne.vv v12, v8, v8 +; CHECK64-NEXT: vmerge.vxm v8, v16, a0, v0 +; CHECK64-NEXT: vmv1r.v v0, v12 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f32.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI3_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI3_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfmax.vf v10, v8, ft0 +; CHECK-NEXT: vfmin.vf v10, v10, ft1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f32.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f32_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI4_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI4_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfmax.vf v12, v8, ft0 +; CHECK-NEXT: vfmin.vf v12, v12, ft1 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f32.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f32_v2i64( %f) { +; CHECK32-LABEL: test_signed_v2f32_v2i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: sw zero, 8(sp) +; CHECK32-NEXT: li a1, -1 +; CHECK32-NEXT: sw a1, 8(sp) +; CHECK32-NEXT: lui a1, %hi(.LCPI5_0) +; CHECK32-NEXT: flw ft0, %lo(.LCPI5_0)(a1) +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v10, (a0), zero +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmerge.vvm v10, v12, v10, v0 +; CHECK32-NEXT: lui a1, %hi(.LCPI5_1) +; CHECK32-NEXT: flw ft0, %lo(.LCPI5_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vlse64.v v12, (a0), zero +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f32_v2i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK64-NEXT: flw ft0, %lo(.LCPI5_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; CHECK64-NEXT: li a0, -1 +; CHECK64-NEXT: lui a1, %hi(.LCPI5_1) +; CHECK64-NEXT: flw ft0, %lo(.LCPI5_1)(a1) +; CHECK64-NEXT: slli a1, a0, 63 +; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmerge.vxm v10, v10, a1, v0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: srli a0, a0, 1 +; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f32.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f32_v4i64( %f) { +; CHECK32-LABEL: test_signed_v4f32_v4i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: sw zero, 8(sp) +; CHECK32-NEXT: li a1, -1 +; CHECK32-NEXT: sw a1, 8(sp) +; CHECK32-NEXT: lui a1, %hi(.LCPI6_0) +; CHECK32-NEXT: flw ft0, %lo(.LCPI6_0)(a1) +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK32-NEXT: vmfge.vf v10, v8, ft0 +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v12, (a0), zero +; CHECK32-NEXT: vmnot.m v0, v10 +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v16, v8 +; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmerge.vvm v12, v16, v12, v0 +; CHECK32-NEXT: lui a1, %hi(.LCPI6_1) +; CHECK32-NEXT: flw ft0, %lo(.LCPI6_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vlse64.v v16, (a0), zero +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmerge.vvm v12, v12, v16, v0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f32_v4i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI6_0) +; CHECK64-NEXT: flw ft0, %lo(.LCPI6_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmfge.vf v10, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v10 +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK64-NEXT: li a0, -1 +; CHECK64-NEXT: lui a1, %hi(.LCPI6_1) +; CHECK64-NEXT: flw ft0, %lo(.LCPI6_1)(a1) +; CHECK64-NEXT: slli a1, a0, 63 +; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmerge.vxm v12, v12, a1, v0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: srli a0, a0, 1 +; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f32.nxv4i64( %f) + ret %x +} + +; Double + +declare @llvm.fptosi.sat.nxv2f64.nxv2i32() +declare @llvm.fptosi.sat.nxv4f64.nxv4i32() +declare @llvm.fptosi.sat.nxv8f64.nxv8i32() +declare @llvm.fptosi.sat.nxv4f64.nxv4i16() +declare @llvm.fptosi.sat.nxv8f64.nxv8i16() +declare @llvm.fptosi.sat.nxv2f64.nxv2i64() +declare @llvm.fptosi.sat.nxv4f64.nxv4i64() + +define @test_signed_v2f64_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f64_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI7_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI7_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI7_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI7_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vfmax.vf v10, v8, ft0 +; CHECK-NEXT: vfmin.vf v10, v10, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f64.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI8_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI8_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI8_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vfmax.vf v12, v8, ft0 +; CHECK-NEXT: vfmin.vf v12, v12, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f64.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f64_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f64_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI9_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI9_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI9_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI9_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vfmax.vf v16, v8, ft0 +; CHECK-NEXT: vfmin.vf v16, v16, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vim v8, v24, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f64.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI10_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI10_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI10_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI10_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vfmax.vf v12, v8, ft0 +; CHECK-NEXT: vfmin.vf v12, v12, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vncvt.x.x.w v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f64.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f64_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f64_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI11_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI11_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vfmax.vf v16, v8, ft0 +; CHECK-NEXT: vfmin.vf v16, v16, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f64.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f64_v2i64( %f) { +; CHECK32-LABEL: test_signed_v2f64_v2i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: sw zero, 8(sp) +; CHECK32-NEXT: li a1, -1 +; CHECK32-NEXT: sw a1, 8(sp) +; CHECK32-NEXT: lui a1, %hi(.LCPI12_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI12_0)(a1) +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmfge.vf v10, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v10 +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v10, (a0), zero +; CHECK32-NEXT: lui a1, %hi(.LCPI12_1) +; CHECK32-NEXT: fld ft0, %lo(.LCPI12_1)(a1) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK32-NEXT: vlse64.v v14, (a0), zero +; CHECK32-NEXT: vmerge.vvm v12, v12, v10, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: vmfne.vv v10, v8, v8 +; CHECK32-NEXT: vmerge.vvm v8, v12, v14, v0 +; CHECK32-NEXT: vmv1r.v v0, v10 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f64_v2i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI12_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmfge.vf v10, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v10 +; CHECK64-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI12_1) +; CHECK64-NEXT: fld ft0, %lo(.LCPI12_1)(a0) +; CHECK64-NEXT: li a0, -1 +; CHECK64-NEXT: slli a1, a0, 63 +; CHECK64-NEXT: vmerge.vxm v12, v10, a1, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: srli a0, a0, 1 +; CHECK64-NEXT: vmfne.vv v10, v8, v8 +; CHECK64-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK64-NEXT: vmv1r.v v0, v10 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f64.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f64_v4i64( %f) { +; CHECK32-LABEL: test_signed_v4f64_v4i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: sw zero, 8(sp) +; CHECK32-NEXT: li a1, -1 +; CHECK32-NEXT: sw a1, 8(sp) +; CHECK32-NEXT: lui a1, %hi(.LCPI13_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI13_0)(a1) +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmfge.vf v12, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v12 +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v12, (a0), zero +; CHECK32-NEXT: lui a1, %hi(.LCPI13_1) +; CHECK32-NEXT: fld ft0, %lo(.LCPI13_1)(a1) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v16, v8 +; CHECK32-NEXT: vlse64.v v20, (a0), zero +; CHECK32-NEXT: vmerge.vvm v16, v16, v12, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: vmfne.vv v12, v8, v8 +; CHECK32-NEXT: vmerge.vvm v8, v16, v20, v0 +; CHECK32-NEXT: vmv1r.v v0, v12 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f64_v4i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI13_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmfge.vf v12, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v12 +; CHECK64-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI13_1) +; CHECK64-NEXT: fld ft0, %lo(.LCPI13_1)(a0) +; CHECK64-NEXT: li a0, -1 +; CHECK64-NEXT: slli a1, a0, 63 +; CHECK64-NEXT: vmerge.vxm v16, v12, a1, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: srli a0, a0, 1 +; CHECK64-NEXT: vmfne.vv v12, v8, v8 +; CHECK64-NEXT: vmerge.vxm v8, v16, a0, v0 +; CHECK64-NEXT: vmv1r.v v0, v12 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f64.nxv4i64( %f) + ret %x +} + + +; half + +declare @llvm.fptosi.sat.nxv2f16.nxv2i32() +declare @llvm.fptosi.sat.nxv4f16.nxv4i32() +declare @llvm.fptosi.sat.nxv8f16.nxv8i32() +declare @llvm.fptosi.sat.nxv4f16.nxv4i16() +declare @llvm.fptosi.sat.nxv8f16.nxv8i16() +declare @llvm.fptosi.sat.nxv2f16.nxv2i64() +declare @llvm.fptosi.sat.nxv4f16.nxv4i64() + +define @test_signed_v2f16_v2i32( %f) { +; CHECK32-LABEL: test_signed_v2f16_v2i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI14_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: lui a1, %hi(.LCPI14_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI14_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f16_v2i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI14_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI14_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI14_1)(a0) +; CHECK64-NEXT: li a0, 1 +; CHECK64-NEXT: slli a0, a0, 31 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: lui a0, 524288 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f16.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i32( %f) { +; CHECK32-LABEL: test_signed_v4f16_v4i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI15_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI15_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: lui a1, %hi(.LCPI15_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI15_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f16_v4i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI15_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI15_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI15_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI15_1)(a0) +; CHECK64-NEXT: li a0, 1 +; CHECK64-NEXT: slli a0, a0, 31 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: lui a0, 524288 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f16.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f16_v8i32( %f) { +; CHECK32-LABEL: test_signed_v8f16_v8i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI16_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI16_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK32-NEXT: vmfge.vf v10, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v10 +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: lui a1, %hi(.LCPI16_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI16_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK32-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK32-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v8f16_v8i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI16_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI16_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK64-NEXT: vmfge.vf v10, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v10 +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI16_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI16_1)(a0) +; CHECK64-NEXT: li a0, 1 +; CHECK64-NEXT: slli a0, a0, 31 +; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK64-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: lui a0, 524288 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK64-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f16.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i16( %f) { +; CHECK32-LABEL: test_signed_v4f16_v4i16: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI17_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: lui a0, %hi(.LCPI17_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI17_1)(a0) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK32-NEXT: lui a0, 8 +; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vmfne.vv v8, v8, v8 +; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK32-NEXT: vmv.v.v v0, v8 +; CHECK32-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f16_v4i16: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI17_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: lui a0, %hi(.LCPI17_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI17_1)(a0) +; CHECK64-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK64-NEXT: lui a0, 8 +; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vmfne.vv v8, v8, v8 +; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK64-NEXT: vmv.v.v v0, v8 +; CHECK64-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f16.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f16_v8i16( %f) { +; CHECK32-LABEL: test_signed_v8f16_v8i16: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI18_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK32-NEXT: vmfge.vf v10, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v10 +; CHECK32-NEXT: lui a0, %hi(.LCPI18_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI18_1)(a0) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK32-NEXT: lui a0, 8 +; CHECK32-NEXT: vmerge.vxm v12, v10, a0, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vmfne.vv v10, v8, v8 +; CHECK32-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK32-NEXT: vmv1r.v v0, v10 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v8f16_v8i16: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI18_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK64-NEXT: vmfge.vf v10, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v10 +; CHECK64-NEXT: lui a0, %hi(.LCPI18_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI18_1)(a0) +; CHECK64-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK64-NEXT: lui a0, 8 +; CHECK64-NEXT: vmerge.vxm v12, v10, a0, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vmfne.vv v10, v8, v8 +; CHECK64-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK64-NEXT: vmv1r.v v0, v10 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f16.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f16_v2i64( %f) { +; CHECK32-LABEL: test_signed_v2f16_v2i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: sw zero, 8(sp) +; CHECK32-NEXT: li a1, -1 +; CHECK32-NEXT: sw a1, 8(sp) +; CHECK32-NEXT: lui a1, %hi(.LCPI19_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI19_0)(a1) +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v10, (a0), zero +; CHECK32-NEXT: vfwcvt.f.f.v v9, v8 +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v9 +; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmerge.vvm v10, v12, v10, v0 +; CHECK32-NEXT: lui a1, %hi(.LCPI19_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI19_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vlse64.v v12, (a0), zero +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f16_v2i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI19_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: vfwcvt.f.f.v v9, v8 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v10, v9 +; CHECK64-NEXT: li a0, -1 +; CHECK64-NEXT: lui a1, %hi(.LCPI19_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI19_1)(a1) +; CHECK64-NEXT: slli a1, a0, 63 +; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmerge.vxm v10, v10, a1, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: srli a0, a0, 1 +; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f16.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f16_v4i64( %f) { +; CHECK32-LABEL: test_signed_v4f16_v4i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: sw zero, 8(sp) +; CHECK32-NEXT: li a1, -1 +; CHECK32-NEXT: sw a1, 8(sp) +; CHECK32-NEXT: lui a1, %hi(.LCPI20_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI20_0)(a1) +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v12, (a0), zero +; CHECK32-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v16, v10 +; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmerge.vvm v12, v16, v12, v0 +; CHECK32-NEXT: lui a1, %hi(.LCPI20_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI20_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK32-NEXT: vlse64.v v16, (a0), zero +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmerge.vvm v12, v12, v16, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f16_v4i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI20_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v12, v10 +; CHECK64-NEXT: li a0, -1 +; CHECK64-NEXT: lui a1, %hi(.LCPI20_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI20_1)(a1) +; CHECK64-NEXT: slli a1, a0, 63 +; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmerge.vxm v12, v12, a1, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: srli a0, a0, 1 +; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f16.nxv4i64( %f) + ret %x +} + Index: llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll @@ -0,0 +1,524 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK64 + +; Float + +declare @llvm.fptoui.sat.nxv2f32.nxv2i32() +declare @llvm.fptoui.sat.nxv4f32.nxv4i32() +declare @llvm.fptoui.sat.nxv8f32.nxv8i32() +declare @llvm.fptoui.sat.nxv4f32.nxv4i16() +declare @llvm.fptoui.sat.nxv8f32.nxv8i16() +declare @llvm.fptoui.sat.nxv2f32.nxv2i64() +declare @llvm.fptoui.sat.nxv4f32.nxv4i64() + +define @test_signed_v2f32_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f32_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f32.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v10, v8, ft0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: vmfge.vf v11, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v11 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f32.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f32_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vmfgt.vf v12, v8, ft0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: vmfge.vf v13, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v13 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f32.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: fmv.w.x ft1, zero +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, ft1 +; CHECK-NEXT: vfmin.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v10 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f32.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f32_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: fmv.w.x ft1, zero +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, ft1 +; CHECK-NEXT: vfmin.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v12 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f32.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f32_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f32_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v10, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f32.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f32_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI6_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v10, v8, ft0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: vmfge.vf v11, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v11 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vim v12, v12, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v8, v12, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f32.nxv4i64( %f) + ret %x +} + +; Double + +declare @llvm.fptoui.sat.nxv2f64.nxv2i32() +declare @llvm.fptoui.sat.nxv4f64.nxv4i32() +declare @llvm.fptoui.sat.nxv8f64.nxv8i32() +declare @llvm.fptoui.sat.nxv4f64.nxv4i16() +declare @llvm.fptoui.sat.nxv8f64.nxv8i16() +declare @llvm.fptoui.sat.nxv2f64.nxv2i64() +declare @llvm.fptoui.sat.nxv4f64.nxv4i64() + +define @test_signed_v2f64_v2i32( %f) { +; CHECK32-LABEL: test_signed_v2f64_v2i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI7_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI7_0)(a0) +; CHECK32-NEXT: fcvt.d.w ft1, zero +; CHECK32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK32-NEXT: vfmax.vf v8, v8, ft1 +; CHECK32-NEXT: vfmin.vf v10, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vfncvt.rtz.xu.f.w v8, v10 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f64_v2i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI7_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI7_0)(a0) +; CHECK64-NEXT: fmv.d.x ft1, zero +; CHECK64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK64-NEXT: vfmax.vf v8, v8, ft1 +; CHECK64-NEXT: vfmin.vf v10, v8, ft0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vfncvt.rtz.xu.f.w v8, v10 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f64.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i32( %f) { +; CHECK32-LABEL: test_signed_v4f64_v4i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI8_0)(a0) +; CHECK32-NEXT: fcvt.d.w ft1, zero +; CHECK32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK32-NEXT: vfmax.vf v8, v8, ft1 +; CHECK32-NEXT: vfmin.vf v12, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vfncvt.rtz.xu.f.w v8, v12 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f64_v4i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI8_0)(a0) +; CHECK64-NEXT: fmv.d.x ft1, zero +; CHECK64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK64-NEXT: vfmax.vf v8, v8, ft1 +; CHECK64-NEXT: vfmin.vf v12, v8, ft0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vfncvt.rtz.xu.f.w v8, v12 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f64.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f64_v8i32( %f) { +; CHECK32-LABEL: test_signed_v8f64_v8i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI9_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI9_0)(a0) +; CHECK32-NEXT: fcvt.d.w ft1, zero +; CHECK32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK32-NEXT: vfmax.vf v8, v8, ft1 +; CHECK32-NEXT: vfmin.vf v16, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK32-NEXT: vfncvt.rtz.xu.f.w v8, v16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v8f64_v8i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI9_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI9_0)(a0) +; CHECK64-NEXT: fmv.d.x ft1, zero +; CHECK64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK64-NEXT: vfmax.vf v8, v8, ft1 +; CHECK64-NEXT: vfmin.vf v16, v8, ft0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK64-NEXT: vfncvt.rtz.xu.f.w v8, v16 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f64.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i16( %f) { +; CHECK32-LABEL: test_signed_v4f64_v4i16: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI10_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI10_0)(a0) +; CHECK32-NEXT: fcvt.d.w ft1, zero +; CHECK32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK32-NEXT: vfmax.vf v8, v8, ft1 +; CHECK32-NEXT: vfmin.vf v8, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK32-NEXT: vncvt.x.x.w v8, v12 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f64_v4i16: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI10_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI10_0)(a0) +; CHECK64-NEXT: fmv.d.x ft1, zero +; CHECK64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK64-NEXT: vfmax.vf v8, v8, ft1 +; CHECK64-NEXT: vfmin.vf v8, v8, ft0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK64-NEXT: vncvt.x.x.w v8, v12 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f64.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f64_v8i16( %f) { +; CHECK32-LABEL: test_signed_v8f64_v8i16: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; CHECK32-NEXT: fcvt.d.w ft1, zero +; CHECK32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK32-NEXT: vfmax.vf v8, v8, ft1 +; CHECK32-NEXT: vfmin.vf v8, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK32-NEXT: vfncvt.rtz.xu.f.w v16, v8 +; CHECK32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK32-NEXT: vncvt.x.x.w v8, v16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v8f64_v8i16: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; CHECK64-NEXT: fmv.d.x ft1, zero +; CHECK64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK64-NEXT: vfmax.vf v8, v8, ft1 +; CHECK64-NEXT: vfmin.vf v8, v8, ft0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK64-NEXT: vfncvt.rtz.xu.f.w v16, v8 +; CHECK64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK64-NEXT: vncvt.x.x.w v8, v16 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f64.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f64_v2i64( %f) { +; CHECK32-LABEL: test_signed_v2f64_v2i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI12_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmfgt.vf v10, v8, ft0 +; CHECK32-NEXT: fcvt.d.w ft0, zero +; CHECK32-NEXT: vmfge.vf v11, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v11 +; CHECK32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: vmv1r.v v0, v10 +; CHECK32-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f64_v2i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI12_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmfgt.vf v10, v8, ft0 +; CHECK64-NEXT: fmv.d.x ft0, zero +; CHECK64-NEXT: vmfge.vf v11, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v11 +; CHECK64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: vmv1r.v v0, v10 +; CHECK64-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f64.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f64_v4i64( %f) { +; CHECK32-LABEL: test_signed_v4f64_v4i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI13_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmfgt.vf v12, v8, ft0 +; CHECK32-NEXT: fcvt.d.w ft0, zero +; CHECK32-NEXT: vmfge.vf v13, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v13 +; CHECK32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: vmv1r.v v0, v12 +; CHECK32-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f64_v4i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI13_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmfgt.vf v12, v8, ft0 +; CHECK64-NEXT: fmv.d.x ft0, zero +; CHECK64-NEXT: vmfge.vf v13, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v13 +; CHECK64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: vmv1r.v v0, v12 +; CHECK64-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f64.nxv4i64( %f) + ret %x +} + + +; half + +declare @llvm.fptoui.sat.nxv2f16.nxv2i32() +declare @llvm.fptoui.sat.nxv4f16.nxv4i32() +declare @llvm.fptoui.sat.nxv8f16.nxv8i32() +declare @llvm.fptoui.sat.nxv4f16.nxv4i16() +declare @llvm.fptoui.sat.nxv8f16.nxv8i16() +declare @llvm.fptoui.sat.nxv2f16.nxv2i64() +declare @llvm.fptoui.sat.nxv4f16.nxv4i64() + +define @test_signed_v2f16_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f16_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f16.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI15_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI15_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v10, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f16.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f16_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f16_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI16_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI16_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v10, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v11, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v11 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vim v12, v12, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v8, v12, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f16.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI17_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f16.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f16_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f16_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI18_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v10, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v11, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v11 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f16.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f16_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f16_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI19_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmerge.vim v10, v12, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v10, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f16.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f16_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI20_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vim v12, v12, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v12, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f16.nxv4i64( %f) + ret %x +} + Index: llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll +++ llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll @@ -1522,61 +1522,105 @@ declare <4 x i128> @llvm.fptosi.sat.v4f32.v4i128(<4 x float>) define arm_aapcs_vfpcc <4 x i1> @test_signed_v4f32_v4i1(<4 x float> %f) { -; CHECK-LABEL: test_signed_v4f32_v4i1: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f32 s4, #-1.000000e+00 -; CHECK-NEXT: vldr s6, .LCPI22_0 -; CHECK-NEXT: vmaxnm.f32 s12, s0, s4 -; CHECK-NEXT: vmaxnm.f32 s8, s3, s4 -; CHECK-NEXT: vminnm.f32 s12, s12, s6 -; CHECK-NEXT: vmaxnm.f32 s10, s2, s4 -; CHECK-NEXT: vcvt.s32.f32 s12, s12 -; CHECK-NEXT: vmaxnm.f32 s4, s1, s4 -; CHECK-NEXT: vminnm.f32 s4, s4, s6 -; CHECK-NEXT: vminnm.f32 s10, s10, s6 -; CHECK-NEXT: vcvt.s32.f32 s4, s4 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: vminnm.f32 s8, s8, s6 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcvt.s32.f32 s10, s10 -; CHECK-NEXT: vcmp.f32 s1, s1 -; CHECK-NEXT: vcvt.s32.f32 s8, s8 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: vcmp.f32 s2, s2 -; CHECK-NEXT: rsb.w r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #0, #1 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: vcmp.f32 s3, s3 -; CHECK-NEXT: rsb.w r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #1, #1 -; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsb.w r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #2, #1 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: strb r1, [r0] -; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI22_0: -; CHECK-NEXT: .long 0x00000000 @ float 0 +; CHECK-MVE-LABEL: test_signed_v4f32_v4i1: +; CHECK-MVE: @ %bb.0: +; CHECK-MVE-NEXT: vmov.f32 s4, #-1.000000e+00 +; CHECK-MVE-NEXT: vldr s6, .LCPI22_0 +; CHECK-MVE-NEXT: vmaxnm.f32 s12, s0, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4 +; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s10, s2, s4 +; CHECK-MVE-NEXT: vcvt.s32.f32 s12, s12 +; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4 +; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6 +; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6 +; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, s0 +; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s10 +; CHECK-MVE-NEXT: vcmp.f32 s1, s1 +; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s8 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: vcmp.f32 s2, s2 +; CHECK-MVE-NEXT: rsb.w r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #0, #1 +; CHECK-MVE-NEXT: vmov r2, s4 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: vcmp.f32 s3, s3 +; CHECK-MVE-NEXT: rsb.w r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #1, #1 +; CHECK-MVE-NEXT: vmov r2, s10 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsb.w r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #2, #1 +; CHECK-MVE-NEXT: vmov r2, s8 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #3, #1 +; CHECK-MVE-NEXT: strb r1, [r0] +; CHECK-MVE-NEXT: bx lr +; CHECK-MVE-NEXT: .p2align 2 +; CHECK-MVE-NEXT: @ %bb.1: +; CHECK-MVE-NEXT: .LCPI22_0: +; CHECK-MVE-NEXT: .long 0x00000000 @ float 0 +; +; CHECK-MVEFP-LABEL: test_signed_v4f32_v4i1: +; CHECK-MVEFP: @ %bb.0: +; CHECK-MVEFP-NEXT: vmov.f32 q1, #-1.000000e+00 +; CHECK-MVEFP-NEXT: vmov.i32 q2, #0x0 +; CHECK-MVEFP-NEXT: vmaxnm.f32 q1, q0, q1 +; CHECK-MVEFP-NEXT: vcmp.f32 s0, s0 +; CHECK-MVEFP-NEXT: vminnm.f32 q1, q1, q2 +; CHECK-MVEFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVEFP-NEXT: vcmp.f32 ne, q1, zr +; CHECK-MVEFP-NEXT: vcmp.f32 s1, s1 +; CHECK-MVEFP-NEXT: vmrs r2, p0 +; CHECK-MVEFP-NEXT: mov r1, r2 +; CHECK-MVEFP-NEXT: it vs +; CHECK-MVEFP-NEXT: movvs r1, #0 +; CHECK-MVEFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVEFP-NEXT: and r1, r1, #1 +; CHECK-MVEFP-NEXT: vcmp.f32 s2, s2 +; CHECK-MVEFP-NEXT: rsb.w r3, r1, #0 +; CHECK-MVEFP-NEXT: mov.w r1, #0 +; CHECK-MVEFP-NEXT: bfi r1, r3, #0, #1 +; CHECK-MVEFP-NEXT: lsr.w r3, r2, #4 +; CHECK-MVEFP-NEXT: it vs +; CHECK-MVEFP-NEXT: movvs r3, #0 +; CHECK-MVEFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVEFP-NEXT: and r3, r3, #1 +; CHECK-MVEFP-NEXT: vcmp.f32 s3, s3 +; CHECK-MVEFP-NEXT: rsb.w r3, r3, #0 +; CHECK-MVEFP-NEXT: bfi r1, r3, #1, #1 +; CHECK-MVEFP-NEXT: lsr.w r3, r2, #8 +; CHECK-MVEFP-NEXT: it vs +; CHECK-MVEFP-NEXT: movvs r3, #0 +; CHECK-MVEFP-NEXT: lsrs r2, r2, #12 +; CHECK-MVEFP-NEXT: and r3, r3, #1 +; CHECK-MVEFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVEFP-NEXT: it vs +; CHECK-MVEFP-NEXT: movvs r2, #0 +; CHECK-MVEFP-NEXT: rsbs r3, r3, #0 +; CHECK-MVEFP-NEXT: and r2, r2, #1 +; CHECK-MVEFP-NEXT: bfi r1, r3, #2, #1 +; CHECK-MVEFP-NEXT: rsbs r2, r2, #0 +; CHECK-MVEFP-NEXT: bfi r1, r2, #3, #1 +; CHECK-MVEFP-NEXT: strb r1, [r0] +; CHECK-MVEFP-NEXT: bx lr %x = call <4 x i1> @llvm.fptosi.sat.v4f32.v4i1(<4 x float> %f) ret <4 x i1> %x } @@ -4438,116 +4482,197 @@ declare <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half>) define arm_aapcs_vfpcc <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i1: -; CHECK: @ %bb.0: -; CHECK-NEXT: .vsave {d8} -; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: vcvtb.f32.f16 s15, s0 -; CHECK-NEXT: vmov.f32 s5, #-1.000000e+00 -; CHECK-NEXT: vldr s7, .LCPI42_0 -; CHECK-NEXT: vmaxnm.f32 s16, s15, s5 -; CHECK-NEXT: vcvtt.f32.f16 s12, s2 -; CHECK-NEXT: vcvtt.f32.f16 s9, s1 -; CHECK-NEXT: vminnm.f32 s16, s16, s7 -; CHECK-NEXT: vcvtt.f32.f16 s4, s3 -; CHECK-NEXT: vcvt.s32.f32 s16, s16 -; CHECK-NEXT: vcvtb.f32.f16 s8, s3 -; CHECK-NEXT: vcvtb.f32.f16 s2, s2 -; CHECK-NEXT: vcvtb.f32.f16 s1, s1 -; CHECK-NEXT: vcvtt.f32.f16 s0, s0 -; CHECK-NEXT: vmaxnm.f32 s6, s4, s5 -; CHECK-NEXT: vmaxnm.f32 s10, s8, s5 -; CHECK-NEXT: vmaxnm.f32 s14, s12, s5 -; CHECK-NEXT: vmaxnm.f32 s3, s2, s5 -; CHECK-NEXT: vmaxnm.f32 s11, s9, s5 -; CHECK-NEXT: vmaxnm.f32 s13, s1, s5 -; CHECK-NEXT: vmaxnm.f32 s5, s0, s5 -; CHECK-NEXT: vminnm.f32 s5, s5, s7 -; CHECK-NEXT: vminnm.f32 s13, s13, s7 -; CHECK-NEXT: vcvt.s32.f32 s5, s5 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vcmp.f32 s15, s15 -; CHECK-NEXT: vminnm.f32 s11, s11, s7 -; CHECK-NEXT: vmov r2, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s13, s13 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: bfi r1, r2, #0, #1 -; CHECK-NEXT: vcvt.s32.f32 s11, s11 -; CHECK-NEXT: vmov r2, s5 -; CHECK-NEXT: vminnm.f32 s3, s3, s7 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcmp.f32 s1, s1 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: rsb.w r2, r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s3, s3 -; CHECK-NEXT: bfi r1, r2, #1, #1 -; CHECK-NEXT: vmov r2, s13 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vminnm.f32 s14, s14, s7 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: vcmp.f32 s9, s9 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: bfi r1, r2, #2, #1 -; CHECK-NEXT: vmov r2, s11 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s14, s14 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: vminnm.f32 s10, s10, s7 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vcmp.f32 s2, s2 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: vmov r2, s3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s10, s10 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vminnm.f32 s6, s6, s7 -; CHECK-NEXT: bfi r1, r2, #4, #1 -; CHECK-NEXT: vcmp.f32 s12, s12 -; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s6, s6 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: vcmp.f32 s8, s8 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: bfi r1, r2, #5, #1 -; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcmp.f32 s4, s4 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: rsb.w r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #6, #1 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #7, #1 -; CHECK-NEXT: strb r1, [r0] -; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI42_0: -; CHECK-NEXT: .long 0x00000000 @ float 0 +; CHECK-MVE-LABEL: test_signed_v8f16_v8i1: +; CHECK-MVE: @ %bb.0: +; CHECK-MVE-NEXT: .vsave {d8} +; CHECK-MVE-NEXT: vpush {d8} +; CHECK-MVE-NEXT: vcvtb.f32.f16 s15, s0 +; CHECK-MVE-NEXT: vmov.f32 s5, #-1.000000e+00 +; CHECK-MVE-NEXT: vldr s7, .LCPI42_0 +; CHECK-MVE-NEXT: vmaxnm.f32 s16, s15, s5 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s12, s2 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s9, s1 +; CHECK-MVE-NEXT: vminnm.f32 s16, s16, s7 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s4, s3 +; CHECK-MVE-NEXT: vcvt.s32.f32 s16, s16 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s8, s3 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s2 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s1, s1 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s0, s0 +; CHECK-MVE-NEXT: vmaxnm.f32 s6, s4, s5 +; CHECK-MVE-NEXT: vmaxnm.f32 s10, s8, s5 +; CHECK-MVE-NEXT: vmaxnm.f32 s14, s12, s5 +; CHECK-MVE-NEXT: vmaxnm.f32 s3, s2, s5 +; CHECK-MVE-NEXT: vmaxnm.f32 s11, s9, s5 +; CHECK-MVE-NEXT: vmaxnm.f32 s13, s1, s5 +; CHECK-MVE-NEXT: vmaxnm.f32 s5, s0, s5 +; CHECK-MVE-NEXT: vminnm.f32 s5, s5, s7 +; CHECK-MVE-NEXT: vminnm.f32 s13, s13, s7 +; CHECK-MVE-NEXT: vcvt.s32.f32 s5, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s15, s15 +; CHECK-MVE-NEXT: vminnm.f32 s11, s11, s7 +; CHECK-MVE-NEXT: vmov r2, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: vcvt.s32.f32 s13, s13 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: vcmp.f32 s0, s0 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: bfi r1, r2, #0, #1 +; CHECK-MVE-NEXT: vcvt.s32.f32 s11, s11 +; CHECK-MVE-NEXT: vmov r2, s5 +; CHECK-MVE-NEXT: vminnm.f32 s3, s3, s7 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, s1 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: rsb.w r2, r2, #0 +; CHECK-MVE-NEXT: vcvt.s32.f32 s3, s3 +; CHECK-MVE-NEXT: bfi r1, r2, #1, #1 +; CHECK-MVE-NEXT: vmov r2, s13 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: vminnm.f32 s14, s14, s7 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: vcmp.f32 s9, s9 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: bfi r1, r2, #2, #1 +; CHECK-MVE-NEXT: vmov r2, s11 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: vcvt.s32.f32 s14, s14 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s7 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, s2 +; CHECK-MVE-NEXT: bfi r1, r2, #3, #1 +; CHECK-MVE-NEXT: vmov r2, s3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s10 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: vminnm.f32 s6, s6, s7 +; CHECK-MVE-NEXT: bfi r1, r2, #4, #1 +; CHECK-MVE-NEXT: vcmp.f32 s12, s12 +; CHECK-MVE-NEXT: vmov r2, s14 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s6 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: vcmp.f32 s8, s8 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: bfi r1, r2, #5, #1 +; CHECK-MVE-NEXT: vmov r2, s10 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s4 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: rsb.w r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #6, #1 +; CHECK-MVE-NEXT: vmov r2, s6 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #7, #1 +; CHECK-MVE-NEXT: strb r1, [r0] +; CHECK-MVE-NEXT: vpop {d8} +; CHECK-MVE-NEXT: bx lr +; CHECK-MVE-NEXT: .p2align 2 +; CHECK-MVE-NEXT: @ %bb.1: +; CHECK-MVE-NEXT: .LCPI42_0: +; CHECK-MVE-NEXT: .long 0x00000000 @ float 0 +; +; CHECK-MVEFP-LABEL: test_signed_v8f16_v8i1: +; CHECK-MVEFP: @ %bb.0: +; CHECK-MVEFP-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-MVEFP-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-MVEFP-NEXT: vmov.i16 q2, #0xbc00 +; CHECK-MVEFP-NEXT: vmov.i32 q1, #0x0 +; CHECK-MVEFP-NEXT: vmaxnm.f16 q2, q0, q2 +; CHECK-MVEFP-NEXT: vminnm.f16 q1, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 ne, q1, zr +; CHECK-MVEFP-NEXT: vmovx.f16 s4, s3 +; CHECK-MVEFP-NEXT: vmrs r2, p0 +; CHECK-MVEFP-NEXT: vcmp.f16 s4, s4 +; CHECK-MVEFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVEFP-NEXT: vcmp.f16 s3, s3 +; CHECK-MVEFP-NEXT: vmovx.f16 s4, s2 +; CHECK-MVEFP-NEXT: lsr.w r12, r2, #14 +; CHECK-MVEFP-NEXT: lsr.w lr, r2, #12 +; CHECK-MVEFP-NEXT: it vs +; CHECK-MVEFP-NEXT: movvs.w r12, #0 +; CHECK-MVEFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVEFP-NEXT: vcmp.f16 s4, s4 +; CHECK-MVEFP-NEXT: it vs +; CHECK-MVEFP-NEXT: movvs.w lr, #0 +; CHECK-MVEFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVEFP-NEXT: lsr.w r3, r2, #10 +; CHECK-MVEFP-NEXT: vcmp.f16 s2, s2 +; CHECK-MVEFP-NEXT: vmovx.f16 s2, s1 +; CHECK-MVEFP-NEXT: it vs +; CHECK-MVEFP-NEXT: movvs r3, #0 +; CHECK-MVEFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVEFP-NEXT: lsr.w r1, r2, #8 +; CHECK-MVEFP-NEXT: vcmp.f16 s2, s2 +; CHECK-MVEFP-NEXT: it vs +; CHECK-MVEFP-NEXT: movvs r1, #0 +; CHECK-MVEFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVEFP-NEXT: lsr.w r4, r2, #6 +; CHECK-MVEFP-NEXT: vcmp.f16 s1, s1 +; CHECK-MVEFP-NEXT: it vs +; CHECK-MVEFP-NEXT: movvs r4, #0 +; CHECK-MVEFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVEFP-NEXT: lsr.w r5, r2, #4 +; CHECK-MVEFP-NEXT: vcmp.f16 s0, s0 +; CHECK-MVEFP-NEXT: vmovx.f16 s0, s0 +; CHECK-MVEFP-NEXT: it vs +; CHECK-MVEFP-NEXT: movvs r5, #0 +; CHECK-MVEFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVEFP-NEXT: lsr.w r6, r2, #2 +; CHECK-MVEFP-NEXT: it vs +; CHECK-MVEFP-NEXT: movvs r2, #0 +; CHECK-MVEFP-NEXT: vcmp.f16 s0, s0 +; CHECK-MVEFP-NEXT: and r2, r2, #1 +; CHECK-MVEFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVEFP-NEXT: it vs +; CHECK-MVEFP-NEXT: movvs r6, #0 +; CHECK-MVEFP-NEXT: rsbs r7, r2, #0 +; CHECK-MVEFP-NEXT: movs r2, #0 +; CHECK-MVEFP-NEXT: and r6, r6, #1 +; CHECK-MVEFP-NEXT: and r5, r5, #1 +; CHECK-MVEFP-NEXT: bfi r2, r7, #0, #1 +; CHECK-MVEFP-NEXT: rsbs r6, r6, #0 +; CHECK-MVEFP-NEXT: and r4, r4, #1 +; CHECK-MVEFP-NEXT: bfi r2, r6, #1, #1 +; CHECK-MVEFP-NEXT: rsbs r5, r5, #0 +; CHECK-MVEFP-NEXT: and r1, r1, #1 +; CHECK-MVEFP-NEXT: bfi r2, r5, #2, #1 +; CHECK-MVEFP-NEXT: rsbs r4, r4, #0 +; CHECK-MVEFP-NEXT: rsbs r1, r1, #0 +; CHECK-MVEFP-NEXT: bfi r2, r4, #3, #1 +; CHECK-MVEFP-NEXT: bfi r2, r1, #4, #1 +; CHECK-MVEFP-NEXT: and r1, r3, #1 +; CHECK-MVEFP-NEXT: rsbs r1, r1, #0 +; CHECK-MVEFP-NEXT: bfi r2, r1, #5, #1 +; CHECK-MVEFP-NEXT: and r1, lr, #1 +; CHECK-MVEFP-NEXT: rsbs r1, r1, #0 +; CHECK-MVEFP-NEXT: bfi r2, r1, #6, #1 +; CHECK-MVEFP-NEXT: and r1, r12, #1 +; CHECK-MVEFP-NEXT: rsbs r1, r1, #0 +; CHECK-MVEFP-NEXT: bfi r2, r1, #7, #1 +; CHECK-MVEFP-NEXT: strb r2, [r0] +; CHECK-MVEFP-NEXT: pop {r4, r5, r6, r7, pc} %x = call <8 x i1> @llvm.fptosi.sat.v8f16.v8i1(<8 x half> %f) ret <8 x i1> %x } Index: llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -1285,45 +1285,69 @@ declare <4 x i128> @llvm.fptoui.sat.v4f32.v4i128(<4 x float>) define arm_aapcs_vfpcc <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) { -; CHECK-LABEL: test_unsigned_v4f32_v4i1: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s4, .LCPI22_0 -; CHECK-NEXT: vmov.f32 s6, #1.000000e+00 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vmaxnm.f32 s0, s0, s4 -; CHECK-NEXT: vmaxnm.f32 s8, s3, s4 -; CHECK-NEXT: vminnm.f32 s0, s0, s6 -; CHECK-NEXT: vmaxnm.f32 s2, s2, s4 -; CHECK-NEXT: vcvt.u32.f32 s0, s0 -; CHECK-NEXT: vmaxnm.f32 s4, s1, s4 -; CHECK-NEXT: vminnm.f32 s4, s4, s6 -; CHECK-NEXT: vminnm.f32 s2, s2, s6 -; CHECK-NEXT: vcvt.u32.f32 s4, s4 -; CHECK-NEXT: vminnm.f32 s8, s8, s6 -; CHECK-NEXT: vcvt.u32.f32 s2, s2 -; CHECK-NEXT: vcvt.u32.f32 s8, s8 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #0, #1 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #1, #1 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #2, #1 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: strb r1, [r0] -; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI22_0: -; CHECK-NEXT: .long 0x00000000 @ float 0 +; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i1: +; CHECK-MVE: @ %bb.0: +; CHECK-MVE-NEXT: vldr s4, .LCPI22_0 +; CHECK-MVE-NEXT: vmov.f32 s6, #1.000000e+00 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4 +; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4 +; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0 +; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4 +; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6 +; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6 +; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4 +; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6 +; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2 +; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8 +; CHECK-MVE-NEXT: vmov r2, s0 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #0, #1 +; CHECK-MVE-NEXT: vmov r2, s4 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #1, #1 +; CHECK-MVE-NEXT: vmov r2, s2 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #2, #1 +; CHECK-MVE-NEXT: vmov r2, s8 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #3, #1 +; CHECK-MVE-NEXT: strb r1, [r0] +; CHECK-MVE-NEXT: bx lr +; CHECK-MVE-NEXT: .p2align 2 +; CHECK-MVE-NEXT: @ %bb.1: +; CHECK-MVE-NEXT: .LCPI22_0: +; CHECK-MVE-NEXT: .long 0x00000000 @ float 0 +; +; CHECK-MVEFP-LABEL: test_unsigned_v4f32_v4i1: +; CHECK-MVEFP: @ %bb.0: +; CHECK-MVEFP-NEXT: vmov.i32 q1, #0x0 +; CHECK-MVEFP-NEXT: vmov.f32 q2, #1.000000e+00 +; CHECK-MVEFP-NEXT: vmaxnm.f32 q0, q0, q1 +; CHECK-MVEFP-NEXT: movs r3, #0 +; CHECK-MVEFP-NEXT: vminnm.f32 q0, q0, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 ne, q0, zr +; CHECK-MVEFP-NEXT: vmrs r1, p0 +; CHECK-MVEFP-NEXT: and r2, r1, #1 +; CHECK-MVEFP-NEXT: rsbs r2, r2, #0 +; CHECK-MVEFP-NEXT: bfi r3, r2, #0, #1 +; CHECK-MVEFP-NEXT: ubfx r2, r1, #4, #1 +; CHECK-MVEFP-NEXT: rsbs r2, r2, #0 +; CHECK-MVEFP-NEXT: bfi r3, r2, #1, #1 +; CHECK-MVEFP-NEXT: ubfx r2, r1, #8, #1 +; CHECK-MVEFP-NEXT: ubfx r1, r1, #12, #1 +; CHECK-MVEFP-NEXT: rsbs r2, r2, #0 +; CHECK-MVEFP-NEXT: bfi r3, r2, #2, #1 +; CHECK-MVEFP-NEXT: rsbs r1, r1, #0 +; CHECK-MVEFP-NEXT: bfi r3, r1, #3, #1 +; CHECK-MVEFP-NEXT: strb r3, [r0] +; CHECK-MVEFP-NEXT: bx lr %x = call <4 x i1> @llvm.fptoui.sat.v4f32.v4i1(<4 x float> %f) ret <4 x i1> %x } @@ -3502,81 +3526,117 @@ declare <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half>) define arm_aapcs_vfpcc <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) { -; CHECK-LABEL: test_unsigned_v8f16_v8i1: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr s4, .LCPI42_0 -; CHECK-NEXT: vcvtt.f32.f16 s8, s3 -; CHECK-NEXT: vcvtb.f32.f16 s10, s3 -; CHECK-NEXT: vcvtb.f32.f16 s3, s0 -; CHECK-NEXT: vmov.f32 s6, #1.000000e+00 -; CHECK-NEXT: vmaxnm.f32 s3, s3, s4 -; CHECK-NEXT: vminnm.f32 s3, s3, s6 -; CHECK-NEXT: vcvtt.f32.f16 s0, s0 -; CHECK-NEXT: vcvt.u32.f32 s3, s3 -; CHECK-NEXT: vmaxnm.f32 s0, s0, s4 -; CHECK-NEXT: vminnm.f32 s0, s0, s6 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vcvt.u32.f32 s0, s0 -; CHECK-NEXT: vcvtt.f32.f16 s14, s1 -; CHECK-NEXT: vcvtb.f32.f16 s1, s1 -; CHECK-NEXT: vmaxnm.f32 s14, s14, s4 -; CHECK-NEXT: vmaxnm.f32 s1, s1, s4 -; CHECK-NEXT: vminnm.f32 s14, s14, s6 -; CHECK-NEXT: vminnm.f32 s1, s1, s6 -; CHECK-NEXT: vcvt.u32.f32 s14, s14 -; CHECK-NEXT: vcvt.u32.f32 s1, s1 -; CHECK-NEXT: vcvtt.f32.f16 s12, s2 -; CHECK-NEXT: vmov r2, s3 -; CHECK-NEXT: vcvtb.f32.f16 s2, s2 -; CHECK-NEXT: vmaxnm.f32 s2, s2, s4 -; CHECK-NEXT: vmaxnm.f32 s12, s12, s4 -; CHECK-NEXT: vminnm.f32 s2, s2, s6 -; CHECK-NEXT: vminnm.f32 s12, s12, s6 -; CHECK-NEXT: vcvt.u32.f32 s2, s2 -; CHECK-NEXT: vmaxnm.f32 s10, s10, s4 -; CHECK-NEXT: vcvt.u32.f32 s12, s12 -; CHECK-NEXT: vminnm.f32 s10, s10, s6 -; CHECK-NEXT: vcvt.u32.f32 s10, s10 -; CHECK-NEXT: vmaxnm.f32 s8, s8, s4 -; CHECK-NEXT: vminnm.f32 s8, s8, s6 -; CHECK-NEXT: vcvt.u32.f32 s8, s8 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #0, #1 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #1, #1 -; CHECK-NEXT: vmov r2, s1 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #2, #1 -; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #4, #1 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #5, #1 -; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #6, #1 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #7, #1 -; CHECK-NEXT: strb r1, [r0] -; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI42_0: -; CHECK-NEXT: .long 0x00000000 @ float 0 +; CHECK-MVE-LABEL: test_unsigned_v8f16_v8i1: +; CHECK-MVE: @ %bb.0: +; CHECK-MVE-NEXT: vldr s4, .LCPI42_0 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s8, s3 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s10, s3 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s3, s0 +; CHECK-MVE-NEXT: vmov.f32 s6, #1.000000e+00 +; CHECK-MVE-NEXT: vmaxnm.f32 s3, s3, s4 +; CHECK-MVE-NEXT: vminnm.f32 s3, s3, s6 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s0, s0 +; CHECK-MVE-NEXT: vcvt.u32.f32 s3, s3 +; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4 +; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s14, s1 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s1, s1 +; CHECK-MVE-NEXT: vmaxnm.f32 s14, s14, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s1, s1, s4 +; CHECK-MVE-NEXT: vminnm.f32 s14, s14, s6 +; CHECK-MVE-NEXT: vminnm.f32 s1, s1, s6 +; CHECK-MVE-NEXT: vcvt.u32.f32 s14, s14 +; CHECK-MVE-NEXT: vcvt.u32.f32 s1, s1 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s12, s2 +; CHECK-MVE-NEXT: vmov r2, s3 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s2 +; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s12, s12, s4 +; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6 +; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s6 +; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2 +; CHECK-MVE-NEXT: vmaxnm.f32 s10, s10, s4 +; CHECK-MVE-NEXT: vcvt.u32.f32 s12, s12 +; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6 +; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s10 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s8, s4 +; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6 +; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #0, #1 +; CHECK-MVE-NEXT: vmov r2, s0 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #1, #1 +; CHECK-MVE-NEXT: vmov r2, s1 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #2, #1 +; CHECK-MVE-NEXT: vmov r2, s14 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #3, #1 +; CHECK-MVE-NEXT: vmov r2, s2 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #4, #1 +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #5, #1 +; CHECK-MVE-NEXT: vmov r2, s10 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #6, #1 +; CHECK-MVE-NEXT: vmov r2, s8 +; CHECK-MVE-NEXT: and r2, r2, #1 +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: bfi r1, r2, #7, #1 +; CHECK-MVE-NEXT: strb r1, [r0] +; CHECK-MVE-NEXT: bx lr +; CHECK-MVE-NEXT: .p2align 2 +; CHECK-MVE-NEXT: @ %bb.1: +; CHECK-MVE-NEXT: .LCPI42_0: +; CHECK-MVE-NEXT: .long 0x00000000 @ float 0 +; +; CHECK-MVEFP-LABEL: test_unsigned_v8f16_v8i1: +; CHECK-MVEFP: @ %bb.0: +; CHECK-MVEFP-NEXT: vmov.i32 q1, #0x0 +; CHECK-MVEFP-NEXT: vmov.i16 q2, #0x3c00 +; CHECK-MVEFP-NEXT: vmaxnm.f16 q0, q0, q1 +; CHECK-MVEFP-NEXT: vminnm.f16 q0, q0, q2 +; CHECK-MVEFP-NEXT: vcmp.f16 ne, q0, zr +; CHECK-MVEFP-NEXT: vmrs r2, p0 +; CHECK-MVEFP-NEXT: and r1, r2, #1 +; CHECK-MVEFP-NEXT: rsbs r3, r1, #0 +; CHECK-MVEFP-NEXT: movs r1, #0 +; CHECK-MVEFP-NEXT: bfi r1, r3, #0, #1 +; CHECK-MVEFP-NEXT: ubfx r3, r2, #2, #1 +; CHECK-MVEFP-NEXT: rsbs r3, r3, #0 +; CHECK-MVEFP-NEXT: bfi r1, r3, #1, #1 +; CHECK-MVEFP-NEXT: ubfx r3, r2, #4, #1 +; CHECK-MVEFP-NEXT: rsbs r3, r3, #0 +; CHECK-MVEFP-NEXT: bfi r1, r3, #2, #1 +; CHECK-MVEFP-NEXT: ubfx r3, r2, #6, #1 +; CHECK-MVEFP-NEXT: rsbs r3, r3, #0 +; CHECK-MVEFP-NEXT: bfi r1, r3, #3, #1 +; CHECK-MVEFP-NEXT: ubfx r3, r2, #8, #1 +; CHECK-MVEFP-NEXT: rsbs r3, r3, #0 +; CHECK-MVEFP-NEXT: bfi r1, r3, #4, #1 +; CHECK-MVEFP-NEXT: ubfx r3, r2, #10, #1 +; CHECK-MVEFP-NEXT: rsbs r3, r3, #0 +; CHECK-MVEFP-NEXT: bfi r1, r3, #5, #1 +; CHECK-MVEFP-NEXT: ubfx r3, r2, #12, #1 +; CHECK-MVEFP-NEXT: ubfx r2, r2, #14, #1 +; CHECK-MVEFP-NEXT: rsbs r3, r3, #0 +; CHECK-MVEFP-NEXT: bfi r1, r3, #6, #1 +; CHECK-MVEFP-NEXT: rsbs r2, r2, #0 +; CHECK-MVEFP-NEXT: bfi r1, r2, #7, #1 +; CHECK-MVEFP-NEXT: strb r1, [r0] +; CHECK-MVEFP-NEXT: bx lr %x = call <8 x i1> @llvm.fptoui.sat.v8f16.v8i1(<8 x half> %f) ret <8 x i1> %x }