diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -737,6 +737,20 @@ case ISD::SELECT: Results.push_back(ExpandSELECT(Node)); return; + case ISD::SELECT_CC: { + if (Node->getValueType(0).isScalableVector()) { + EVT CondVT = TLI.getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); + SDValue SetCC = + DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0), + Node->getOperand(1), Node->getOperand(4)); + Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC, + Node->getOperand(2), + Node->getOperand(3))); + return; + } + break; + } case ISD::FP_TO_UINT: ExpandFP_TO_UINT(Node, Results); return; @@ -833,6 +847,16 @@ return; } break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + // Expand the fpsosisat if it is scalable to prevent it from unrolling below. + if (Node->getValueType(0).isScalableVector()) { + if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) { + Results.push_back(Expanded); + return; + } + } + break; case ISD::SMULFIX: case ISD::UMULFIX: if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6111,8 +6111,8 @@ assert(N1.getValueType().isVector() == VT.isVector() && "FP_TO_*INT_SAT type should be vector iff the operand type is " "vector!"); - assert((!VT.isVector() || VT.getVectorNumElements() == - N1.getValueType().getVectorNumElements()) && + assert((!VT.isVector() || VT.getVectorElementCount() == + N1.getValueType().getVectorElementCount()) && "Vector element counts must match in FP_TO_*INT_SAT"); assert(!cast(N2)->getVT().isVector() && "Type to saturate to must be a scalar."); @@ -8941,6 +8941,11 @@ "True and False arms of SelectCC must have same type!"); assert(Ops[2].getValueType() == VT && "select_cc node must be of same type as true and false value!"); + assert((!Ops[0].getValueType().isVector() || + Ops[0].getValueType().getVectorElementCount() == + VT.getVectorElementCount()) && + "Expected select_cc with vector result to have the same sized " + "comparison type!"); break; case ISD::BR_CC: assert(NumOps == 5 && "BR_CC takes 5 operands!"); diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll @@ -0,0 +1,720 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s + +; Float + +declare @llvm.fptosi.sat.nxv2f32.nxv2i32() +declare @llvm.fptosi.sat.nxv4f32.nxv4i32() +declare @llvm.fptosi.sat.nxv8f32.nxv8i32() +declare @llvm.fptosi.sat.nxv4f32.nxv4i16() +declare @llvm.fptosi.sat.nxv8f32.nxv8i16() +declare @llvm.fptosi.sat.nxv2f32.nxv2i64() +declare @llvm.fptosi.sat.nxv4f32.nxv4i64() + +define @test_signed_v2f32_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f32_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-822083584 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #0xffffffff80000000 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov w8, #1325400063 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.d, #0x7fffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f32.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-822083584 +; CHECK-NEXT: mov w9, #-2147483648 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov w8, #1325400063 +; CHECK-NEXT: mov z2.s, w9 +; CHECK-NEXT: mov w9, #2147483647 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z3.s +; CHECK-NEXT: mov z2.s, w9 +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f32.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f32_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-822083584 +; CHECK-NEXT: mov w9, #-2147483648 +; CHECK-NEXT: mov w10, #1325400063 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: fcvtzs z5.s, p0/m, z0.s +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, z2.s +; CHECK-NEXT: mov z3.s, w9 +; CHECK-NEXT: mov z4.s, w10 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z5.s, p1/m, z3.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z4.s +; CHECK-NEXT: mov z6.s, w8 +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.s, p0/m, z1.s +; CHECK-NEXT: sel z3.s, p2, z3.s, z2.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z4.s +; CHECK-NEXT: sel z2.s, p1, z6.s, z5.s +; CHECK-NEXT: mov z3.s, p2/m, z6.s +; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z0.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z1.s, z1.s +; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z3.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f32.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-956301312 +; CHECK-NEXT: mov w9, #65024 +; CHECK-NEXT: movk w9, #18175, lsl #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.s, w9 +; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f32.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f32_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-956301312 +; CHECK-NEXT: mov w9, #65024 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movk w9, #18175, lsl #16 +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: fcvtzs z4.s, p0/m, z1.s +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: mov z3.s, w9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z3.s +; CHECK-NEXT: mov z4.s, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzs z2.s, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z5.s, w8 +; CHECK-NEXT: mov z2.s, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z3.s +; CHECK-NEXT: sel z3.s, p2, z5.s, z4.s +; CHECK-NEXT: mov z2.s, p1/m, z5.s +; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.h, z2.h, z3.h +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f32.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f32_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f32_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-553648128 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov w8, #1593835519 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f32.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f32_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-553648128 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: mov w9, #1593835519 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: uunpkhi z5.d, z0.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.s +; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z1.s +; CHECK-NEXT: mov z4.s, w9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.s, p0/z, z3.s, z4.s +; CHECK-NEXT: mov z0.d, p1/m, z2.d +; CHECK-NEXT: fcmge p1.s, p0/z, z5.s, z1.s +; CHECK-NEXT: movprfx z1, z5 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmgt p1.s, p0/z, z5.s, z4.s +; CHECK-NEXT: mov z0.d, p2/m, z6.d +; CHECK-NEXT: mov z1.d, p1/m, z6.d +; CHECK-NEXT: fcmuo p1.s, p0/z, z3.s, z3.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z5.s, z5.s +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f32.nxv4i64( %f) + ret %x +} + +; Double + +declare @llvm.fptosi.sat.nxv2f64.nxv2i32() +declare @llvm.fptosi.sat.nxv4f64.nxv4i32() +declare @llvm.fptosi.sat.nxv8f64.nxv8i32() +declare @llvm.fptosi.sat.nxv4f64.nxv4i16() +declare @llvm.fptosi.sat.nxv8f64.nxv8i16() +declare @llvm.fptosi.sat.nxv2f64.nxv2i64() +declare @llvm.fptosi.sat.nxv4f64.nxv4i64() + +define @test_signed_v2f64_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f64_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4476578029606273024 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #0xffffffff80000000 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov x8, #281474972516352 +; CHECK-NEXT: movk x8, #16863, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z2.d, #0x7fffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f64.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4476578029606273024 +; CHECK-NEXT: mov x9, #281474972516352 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x9, #16863, lsl #48 +; CHECK-NEXT: mov z3.d, #0xffffffff80000000 +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.d +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov z6.d, #0x7fffffff +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d +; CHECK-NEXT: mov z5.d, x9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z5.d +; CHECK-NEXT: mov z4.d, p1/m, z3.d +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z0.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.d, p1/m, z3.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z5.d +; CHECK-NEXT: sel z3.d, p2, z6.d, z4.d +; CHECK-NEXT: mov z2.d, p1/m, z6.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.s, z2.s, z3.s +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f64.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f64_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f64_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4476578029606273024 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z5.d, #0xffffffff80000000 +; CHECK-NEXT: movprfx z6, z1 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z1.d +; CHECK-NEXT: mov z24.d, #0x7fffffff +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: mov x8, #281474972516352 +; CHECK-NEXT: movk x8, #16863, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z4.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d +; CHECK-NEXT: mov z6.d, p1/m, z5.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z7.d, x8 +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z7.d +; CHECK-NEXT: mov z6.d, p1/m, z24.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d +; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p2/m, z5.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z7.d +; CHECK-NEXT: mov z1.d, p2/m, z24.d +; CHECK-NEXT: fcmge p2.d, p0/z, z3.d, z4.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z0.d +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p2/m, z5.d +; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d +; CHECK-NEXT: movprfx z4, z2 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z2.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z7.d +; CHECK-NEXT: mov z4.d, p2/m, z5.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z7.d +; CHECK-NEXT: sel z5.d, p1, z24.d, z0.d +; CHECK-NEXT: mov z4.d, p2/m, z24.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z2.d, z2.d +; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z4.d, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.s, z1.s, z6.s +; CHECK-NEXT: uzp1 z1.s, z4.s, z5.s +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f64.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4548635623644200960 +; CHECK-NEXT: mov x9, #281200098803712 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x9, #16607, lsl #48 +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.d +; CHECK-NEXT: mov z3.d, #32767 // =0x7fff +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d +; CHECK-NEXT: mov z5.d, x9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z5.d +; CHECK-NEXT: mov z4.d, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z0.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.d, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z5.d +; CHECK-NEXT: mov z4.d, p2/m, z3.d +; CHECK-NEXT: mov z2.d, p1/m, z3.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z4.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.s, z2.s, z4.s +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f64.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f64_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f64_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4548635623644200960 +; CHECK-NEXT: mov x9, #281200098803712 +; CHECK-NEXT: movk x9, #16607, lsl #48 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z5, z3 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z3.d +; CHECK-NEXT: mov z7.d, #32767 // =0x7fff +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, z4.d +; CHECK-NEXT: mov z6.d, x9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z3.d, z6.d +; CHECK-NEXT: mov z5.d, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d +; CHECK-NEXT: mov z5.d, p2/m, z7.d +; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d +; CHECK-NEXT: movprfx z3, z2 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z2.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z3.d, p2/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z6.d +; CHECK-NEXT: mov z3.d, p2/m, z7.d +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z4.d +; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p1.d, p0/z, z2.d, z2.d +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.d, p2/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z6.d +; CHECK-NEXT: mov z4.d, p2/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z6.d +; CHECK-NEXT: mov z2.d, p1/m, z7.d +; CHECK-NEXT: mov z4.d, p2/m, z7.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z4.d, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.s, z3.s, z5.s +; CHECK-NEXT: uzp1 z1.s, z4.s, z2.s +; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f64.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f64_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f64_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4332462841530417152 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f64.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f64_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4332462841530417152 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x9, #4890909195324358655 +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z5.d, x9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z5.d +; CHECK-NEXT: mov z4.d, p1/m, z3.d +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d +; CHECK-NEXT: sel z3.d, p1, z3.d, z2.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z5.d +; CHECK-NEXT: sel z2.d, p2, z6.d, z4.d +; CHECK-NEXT: mov z3.d, p1/m, z6.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z0.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z1.d, z1.d +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f64.nxv4i64( %f) + ret %x +} + + +; half + +declare @llvm.fptosi.sat.nxv2f16.nxv2i32() +declare @llvm.fptosi.sat.nxv4f16.nxv4i32() +declare @llvm.fptosi.sat.nxv8f16.nxv8i32() +declare @llvm.fptosi.sat.nxv4f16.nxv4i16() +declare @llvm.fptosi.sat.nxv8f16.nxv8i16() +declare @llvm.fptosi.sat.nxv2f16.nxv2i64() +declare @llvm.fptosi.sat.nxv4f16.nxv4i64() + +define @test_signed_v2f16_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f16_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI14_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI14_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z3.d, #0xffffffff80000000 +; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] +; CHECK-NEXT: adrp x8, .LCPI14_1 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI14_1 +; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, z3.d +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.d, #0x7fffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f16.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI15_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI15_0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: adrp x9, .LCPI15_1 +; CHECK-NEXT: add x9, x9, :lo12:.LCPI15_1 +; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] +; CHECK-NEXT: mov w8, #-2147483648 +; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x9] +; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, p1/m, z3.s +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f16.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f16_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f16_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI16_0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: adrp x9, .LCPI16_1 +; CHECK-NEXT: add x9, x9, :lo12:.LCPI16_1 +; CHECK-NEXT: uunpklo z2.s, z0.h +; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] +; CHECK-NEXT: mov w8, #-2147483648 +; CHECK-NEXT: uunpkhi z6.s, z0.h +; CHECK-NEXT: ld1rh { z3.s }, p0/z, [x9] +; CHECK-NEXT: movprfx z4, z2 +; CHECK-NEXT: fcvtzs z4.s, p0/m, z2.h +; CHECK-NEXT: mov z5.s, w8 +; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z1.h +; CHECK-NEXT: fcmge p2.h, p0/z, z6.h, z1.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z4.s, p1/m, z5.s +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z7.s, w8 +; CHECK-NEXT: movprfx z0, z6 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z6.h +; CHECK-NEXT: sel z1.s, p2, z5.s, z0.s +; CHECK-NEXT: fcmgt p2.h, p0/z, z6.h, z3.h +; CHECK-NEXT: sel z0.s, p1, z7.s, z4.s +; CHECK-NEXT: mov z1.s, p2/m, z7.s +; CHECK-NEXT: fcmuo p1.h, p0/z, z2.h, z2.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z6.h, z6.h +; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f16.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI17_0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] +; CHECK-NEXT: adrp x8, .LCPI17_1 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI17_1 +; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x8] +; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f16.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f16_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f16_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI18_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI18_0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1rh { z1.h }, p0/z, [x8] +; CHECK-NEXT: adrp x8, .LCPI18_1 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI18_1 +; CHECK-NEXT: ld1rh { z2.h }, p0/z, [x8] +; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.h, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.h, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z1.h, p1/m, z2.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.h, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f16.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f16_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f16_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI19_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI19_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] +; CHECK-NEXT: adrp x8, .LCPI19_1 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI19_1 +; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, z3.d +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f16.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f16_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI20_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z4.d, z0.s +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: uunpkhi z5.d, z0.s +; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] +; CHECK-NEXT: adrp x8, .LCPI20_1 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_1 +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: fcmge p1.h, p0/z, z4.h, z1.h +; CHECK-NEXT: movprfx z0, z4 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z0.d, p1/m, z3.d +; CHECK-NEXT: fcmge p1.h, p0/z, z5.h, z1.h +; CHECK-NEXT: movprfx z1, z5 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.h, p0/z, z4.h, z2.h +; CHECK-NEXT: mov z1.d, p1/m, z3.d +; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z2.h +; CHECK-NEXT: mov z0.d, p2/m, z6.d +; CHECK-NEXT: mov z1.d, p1/m, z6.d +; CHECK-NEXT: fcmuo p1.h, p0/z, z4.h, z4.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z5.h, z5.h +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f16.nxv4i64( %f) + ret %x +} + diff --git a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll @@ -0,0 +1,556 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s + +; Float + +declare @llvm.fptoui.sat.nxv2f32.nxv2i32() +declare @llvm.fptoui.sat.nxv4f32.nxv4i32() +declare @llvm.fptoui.sat.nxv8f32.nxv8i32() +declare @llvm.fptoui.sat.nxv4f32.nxv4i16() +declare @llvm.fptoui.sat.nxv8f32.nxv8i16() +declare @llvm.fptoui.sat.nxv2f32.nxv2i64() +declare @llvm.fptoui.sat.nxv4f32.nxv4i64() + +define @test_signed_v2f32_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f32_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1333788671 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0xffffffff +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p2/m, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f32.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1333788671 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f32.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f32_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1333788671 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z4.s, w8 +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z4.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z4.s +; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z2.s, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z3.s, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f32.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65280 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movk w8, #18303, lsl #16 +; CHECK-NEXT: mov w9, #65535 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, w9 +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.s, p2/m, z1.s +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f32.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f32_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65280 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movk w8, #18303, lsl #16 +; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z2.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z2.s, p2, z0.s, z3.s +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f32.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f32_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f32_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1602224127 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f32.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f32_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1602224127 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: uunpkhi z3.d, z0.s +; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmge p2.s, p0/z, z3.s, #0.0 +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmgt p0.s, p0/z, z3.s, z2.s +; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f32.nxv4i64( %f) + ret %x +} + +; Double + +declare @llvm.fptoui.sat.nxv2f64.nxv2i32() +declare @llvm.fptoui.sat.nxv4f64.nxv4i32() +declare @llvm.fptoui.sat.nxv8f64.nxv8i32() +declare @llvm.fptoui.sat.nxv4f64.nxv4i16() +declare @llvm.fptoui.sat.nxv8f64.nxv8i16() +declare @llvm.fptoui.sat.nxv2f64.nxv2i64() +declare @llvm.fptoui.sat.nxv4f64.nxv4i64() + +define @test_signed_v2f64_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f64_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #281474974613504 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x8, #16879, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z1.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0xffffffff +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p2/m, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f64.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #281474974613504 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x8, #16879, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z2.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z0.d, #0xffffffff +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z2.d, p2, z0.d, z3.d +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f64.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f64_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f64_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #281474974613504 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x8, #16879, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: fcvtzu z5.d, p0/m, z1.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: movprfx z6, z0 +; CHECK-NEXT: fcvtzu z6.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z4.d +; CHECK-NEXT: mov z1.d, #0xffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z4.d +; CHECK-NEXT: sel z0.d, p2, z1.d, z5.d +; CHECK-NEXT: fcmge p2.d, p0/z, z3.d, #0.0 +; CHECK-NEXT: sel z5.d, p1, z1.d, z6.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z4.d +; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, #0.0 +; CHECK-NEXT: movprfx z6, z2 +; CHECK-NEXT: fcvtzu z6.d, p0/m, z2.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmgt p0.d, p0/z, z2.d, z4.d +; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0 +; CHECK-NEXT: sel z2.d, p1, z1.d, z3.d +; CHECK-NEXT: sel z1.d, p0, z1.d, z6.d +; CHECK-NEXT: uzp1 z0.s, z5.s, z0.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f64.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #281337537757184 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x8, #16623, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z2.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z0.d, #65535 // =0xffff +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z2.d, p2, z0.d, z3.d +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f64.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f64_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f64_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #281337537757184 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movk x8, #16623, lsl #48 +; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, #0.0 +; CHECK-NEXT: movprfx z5, z3 +; CHECK-NEXT: fcvtzu z5.d, p0/m, z3.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.d, p0/z, z2.d, #0.0 +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: movprfx z6, z2 +; CHECK-NEXT: fcvtzu z6.d, p0/m, z2.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z3.d, z4.d +; CHECK-NEXT: mov z3.d, #65535 // =0xffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.d, p0/z, z2.d, z4.d +; CHECK-NEXT: sel z2.d, p2, z3.d, z5.d +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: sel z5.d, p1, z3.d, z6.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d +; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: movprfx z6, z0 +; CHECK-NEXT: fcvtzu z6.d, p0/m, z0.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d +; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z3.d, z1.d +; CHECK-NEXT: sel z1.d, p0, z3.d, z6.d +; CHECK-NEXT: uzp1 z2.s, z5.s, z2.s +; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f64.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f64_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f64_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #4895412794951729151 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f64.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f64_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #4895412794951729151 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z4.d +; CHECK-NEXT: fcmgt p0.d, p0/z, z1.d, z4.d +; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z3.d, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f64.nxv4i64( %f) + ret %x +} + + +; half + +declare @llvm.fptoui.sat.nxv2f16.nxv2i32() +declare @llvm.fptoui.sat.nxv4f16.nxv4i32() +declare @llvm.fptoui.sat.nxv8f16.nxv8i32() +declare @llvm.fptoui.sat.nxv4f16.nxv4i16() +declare @llvm.fptoui.sat.nxv8f16.nxv8i16() +declare @llvm.fptoui.sat.nxv2f16.nxv2i64() +declare @llvm.fptoui.sat.nxv4f16.nxv4i64() + +define @test_signed_v2f16_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f16_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI14_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI14_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z0.d, #0xffffffff +; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f16.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI15_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI15_0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x8] +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f16.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f16_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f16_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI16_0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uunpklo z3.s, z0.h +; CHECK-NEXT: uunpkhi z4.s, z0.h +; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, #0.0 +; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x8] +; CHECK-NEXT: fcmge p2.h, p0/z, z4.h, #0.0 +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z3.h +; CHECK-NEXT: movprfx z1, z4 +; CHECK-NEXT: fcvtzu z1.s, p0/m, z4.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.s, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.h, p0/z, z3.h, z2.h +; CHECK-NEXT: fcmgt p0.h, p0/z, z4.h, z2.h +; CHECK-NEXT: mov z0.s, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f16.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI17_0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f16.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f16_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f16_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI18_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI18_0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1rh { z2.h }, p0/z, [x8] +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.h, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z1.h, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f16.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f16_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f16_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI19_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI19_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f16.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f16_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI20_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, #0.0 +; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: fcmge p2.h, p0/z, z4.h, #0.0 +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z3.h +; CHECK-NEXT: movprfx z1, z4 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z4.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.h, p0/z, z3.h, z2.h +; CHECK-NEXT: fcmgt p0.h, p0/z, z4.h, z2.h +; CHECK-NEXT: mov z0.d, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f16.nxv4i64( %f) + ret %x +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll @@ -0,0 +1,952 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK64 + +; Float + +declare @llvm.fptosi.sat.nxv2f32.nxv2i32() +declare @llvm.fptosi.sat.nxv4f32.nxv4i32() +declare @llvm.fptosi.sat.nxv8f32.nxv8i32() +declare @llvm.fptosi.sat.nxv4f32.nxv4i16() +declare @llvm.fptosi.sat.nxv8f32.nxv8i16() +declare @llvm.fptosi.sat.nxv2f32.nxv2i64() +declare @llvm.fptosi.sat.nxv4f32.nxv4i64() + +define @test_signed_v2f32_v2i32( %f) { +; CHECK32-LABEL: test_signed_v2f32_v2i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK32-NEXT: flw ft0, %lo(.LCPI0_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: lui a0, %hi(.LCPI0_1) +; CHECK32-NEXT: flw ft0, %lo(.LCPI0_1)(a0) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vmfne.vv v8, v8, v8 +; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK32-NEXT: vmv.v.v v0, v8 +; CHECK32-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f32_v2i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK64-NEXT: flw ft0, %lo(.LCPI0_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI0_1) +; CHECK64-NEXT: flw ft0, %lo(.LCPI0_1)(a0) +; CHECK64-NEXT: li a0, 1 +; CHECK64-NEXT: slli a0, a0, 31 +; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: lui a0, 524288 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vmfne.vv v8, v8, v8 +; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK64-NEXT: vmv.v.v v0, v8 +; CHECK64-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f32.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i32( %f) { +; CHECK32-LABEL: test_signed_v4f32_v4i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK32-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK32-NEXT: vmfge.vf v10, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v10 +; CHECK32-NEXT: lui a0, %hi(.LCPI1_1) +; CHECK32-NEXT: flw ft0, %lo(.LCPI1_1)(a0) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: vmerge.vxm v12, v10, a0, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vmfne.vv v10, v8, v8 +; CHECK32-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK32-NEXT: vmv1r.v v0, v10 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f32_v4i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK64-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmfge.vf v10, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v10 +; CHECK64-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI1_1) +; CHECK64-NEXT: flw ft0, %lo(.LCPI1_1)(a0) +; CHECK64-NEXT: li a0, 1 +; CHECK64-NEXT: slli a0, a0, 31 +; CHECK64-NEXT: vmerge.vxm v12, v10, a0, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: lui a0, 524288 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vmfne.vv v10, v8, v8 +; CHECK64-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK64-NEXT: vmv1r.v v0, v10 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f32.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f32_v8i32( %f) { +; CHECK32-LABEL: test_signed_v8f32_v8i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK32-NEXT: flw ft0, %lo(.LCPI2_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK32-NEXT: vmfge.vf v12, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v12 +; CHECK32-NEXT: lui a0, %hi(.LCPI2_1) +; CHECK32-NEXT: flw ft0, %lo(.LCPI2_1)(a0) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: vmerge.vxm v16, v12, a0, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vmfne.vv v12, v8, v8 +; CHECK32-NEXT: vmerge.vxm v8, v16, a0, v0 +; CHECK32-NEXT: vmv1r.v v0, v12 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v8f32_v8i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK64-NEXT: flw ft0, %lo(.LCPI2_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK64-NEXT: vmfge.vf v12, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v12 +; CHECK64-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI2_1) +; CHECK64-NEXT: flw ft0, %lo(.LCPI2_1)(a0) +; CHECK64-NEXT: li a0, 1 +; CHECK64-NEXT: slli a0, a0, 31 +; CHECK64-NEXT: vmerge.vxm v16, v12, a0, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: lui a0, 524288 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vmfne.vv v12, v8, v8 +; CHECK64-NEXT: vmerge.vxm v8, v16, a0, v0 +; CHECK64-NEXT: vmv1r.v v0, v12 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f32.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI3_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI3_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfmax.vf v10, v8, ft0 +; CHECK-NEXT: vfmin.vf v10, v10, ft1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f32.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f32_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI4_1) +; CHECK-NEXT: flw ft1, %lo(.LCPI4_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfmax.vf v12, v8, ft0 +; CHECK-NEXT: vfmin.vf v12, v12, ft1 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f32.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f32_v2i64( %f) { +; CHECK32-LABEL: test_signed_v2f32_v2i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: sw zero, 8(sp) +; CHECK32-NEXT: li a1, -1 +; CHECK32-NEXT: sw a1, 8(sp) +; CHECK32-NEXT: lui a1, %hi(.LCPI5_0) +; CHECK32-NEXT: flw ft0, %lo(.LCPI5_0)(a1) +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v10, (a0), zero +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmerge.vvm v10, v12, v10, v0 +; CHECK32-NEXT: lui a1, %hi(.LCPI5_1) +; CHECK32-NEXT: flw ft0, %lo(.LCPI5_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vlse64.v v12, (a0), zero +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f32_v2i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK64-NEXT: flw ft0, %lo(.LCPI5_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; CHECK64-NEXT: li a0, -1 +; CHECK64-NEXT: lui a1, %hi(.LCPI5_1) +; CHECK64-NEXT: flw ft0, %lo(.LCPI5_1)(a1) +; CHECK64-NEXT: slli a1, a0, 63 +; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmerge.vxm v10, v10, a1, v0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: srli a0, a0, 1 +; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f32.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f32_v4i64( %f) { +; CHECK32-LABEL: test_signed_v4f32_v4i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: sw zero, 8(sp) +; CHECK32-NEXT: li a1, -1 +; CHECK32-NEXT: sw a1, 8(sp) +; CHECK32-NEXT: lui a1, %hi(.LCPI6_0) +; CHECK32-NEXT: flw ft0, %lo(.LCPI6_0)(a1) +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK32-NEXT: vmfge.vf v10, v8, ft0 +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v12, (a0), zero +; CHECK32-NEXT: vmnot.m v0, v10 +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v16, v8 +; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmerge.vvm v12, v16, v12, v0 +; CHECK32-NEXT: lui a1, %hi(.LCPI6_1) +; CHECK32-NEXT: flw ft0, %lo(.LCPI6_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vlse64.v v16, (a0), zero +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmerge.vvm v12, v12, v16, v0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f32_v4i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI6_0) +; CHECK64-NEXT: flw ft0, %lo(.LCPI6_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmfge.vf v10, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v10 +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK64-NEXT: li a0, -1 +; CHECK64-NEXT: lui a1, %hi(.LCPI6_1) +; CHECK64-NEXT: flw ft0, %lo(.LCPI6_1)(a1) +; CHECK64-NEXT: slli a1, a0, 63 +; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmerge.vxm v12, v12, a1, v0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: srli a0, a0, 1 +; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f32.nxv4i64( %f) + ret %x +} + +; Double + +declare @llvm.fptosi.sat.nxv2f64.nxv2i32() +declare @llvm.fptosi.sat.nxv4f64.nxv4i32() +declare @llvm.fptosi.sat.nxv8f64.nxv8i32() +declare @llvm.fptosi.sat.nxv4f64.nxv4i16() +declare @llvm.fptosi.sat.nxv8f64.nxv8i16() +declare @llvm.fptosi.sat.nxv2f64.nxv2i64() +declare @llvm.fptosi.sat.nxv4f64.nxv4i64() + +define @test_signed_v2f64_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f64_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI7_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI7_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI7_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI7_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vfmax.vf v10, v8, ft0 +; CHECK-NEXT: vfmin.vf v10, v10, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f64.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI8_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI8_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI8_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vfmax.vf v12, v8, ft0 +; CHECK-NEXT: vfmin.vf v12, v12, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f64.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f64_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f64_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI9_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI9_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI9_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI9_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vfmax.vf v16, v8, ft0 +; CHECK-NEXT: vfmin.vf v16, v16, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vim v8, v24, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f64.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f64_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI10_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI10_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI10_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI10_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vfmax.vf v12, v8, ft0 +; CHECK-NEXT: vfmin.vf v12, v12, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vncvt.x.x.w v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f64.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f64_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f64_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI11_1) +; CHECK-NEXT: fld ft1, %lo(.LCPI11_1)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vfmax.vf v16, v8, ft0 +; CHECK-NEXT: vfmin.vf v16, v16, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f64.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f64_v2i64( %f) { +; CHECK32-LABEL: test_signed_v2f64_v2i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: sw zero, 8(sp) +; CHECK32-NEXT: li a1, -1 +; CHECK32-NEXT: sw a1, 8(sp) +; CHECK32-NEXT: lui a1, %hi(.LCPI12_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI12_0)(a1) +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmfge.vf v10, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v10 +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v10, (a0), zero +; CHECK32-NEXT: lui a1, %hi(.LCPI12_1) +; CHECK32-NEXT: fld ft0, %lo(.LCPI12_1)(a1) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK32-NEXT: vlse64.v v14, (a0), zero +; CHECK32-NEXT: vmerge.vvm v12, v12, v10, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: vmfne.vv v10, v8, v8 +; CHECK32-NEXT: vmerge.vvm v8, v12, v14, v0 +; CHECK32-NEXT: vmv1r.v v0, v10 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f64_v2i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI12_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmfge.vf v10, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v10 +; CHECK64-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI12_1) +; CHECK64-NEXT: fld ft0, %lo(.LCPI12_1)(a0) +; CHECK64-NEXT: li a0, -1 +; CHECK64-NEXT: slli a1, a0, 63 +; CHECK64-NEXT: vmerge.vxm v12, v10, a1, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: srli a0, a0, 1 +; CHECK64-NEXT: vmfne.vv v10, v8, v8 +; CHECK64-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK64-NEXT: vmv1r.v v0, v10 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f64.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f64_v4i64( %f) { +; CHECK32-LABEL: test_signed_v4f64_v4i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: sw zero, 8(sp) +; CHECK32-NEXT: li a1, -1 +; CHECK32-NEXT: sw a1, 8(sp) +; CHECK32-NEXT: lui a1, %hi(.LCPI13_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI13_0)(a1) +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmfge.vf v12, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v12 +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v12, (a0), zero +; CHECK32-NEXT: lui a1, %hi(.LCPI13_1) +; CHECK32-NEXT: fld ft0, %lo(.LCPI13_1)(a1) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v16, v8 +; CHECK32-NEXT: vlse64.v v20, (a0), zero +; CHECK32-NEXT: vmerge.vvm v16, v16, v12, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: vmfne.vv v12, v8, v8 +; CHECK32-NEXT: vmerge.vvm v8, v16, v20, v0 +; CHECK32-NEXT: vmv1r.v v0, v12 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f64_v4i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI13_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmfge.vf v12, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v12 +; CHECK64-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI13_1) +; CHECK64-NEXT: fld ft0, %lo(.LCPI13_1)(a0) +; CHECK64-NEXT: li a0, -1 +; CHECK64-NEXT: slli a1, a0, 63 +; CHECK64-NEXT: vmerge.vxm v16, v12, a1, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: srli a0, a0, 1 +; CHECK64-NEXT: vmfne.vv v12, v8, v8 +; CHECK64-NEXT: vmerge.vxm v8, v16, a0, v0 +; CHECK64-NEXT: vmv1r.v v0, v12 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f64.nxv4i64( %f) + ret %x +} + + +; half + +declare @llvm.fptosi.sat.nxv2f16.nxv2i32() +declare @llvm.fptosi.sat.nxv4f16.nxv4i32() +declare @llvm.fptosi.sat.nxv8f16.nxv8i32() +declare @llvm.fptosi.sat.nxv4f16.nxv4i16() +declare @llvm.fptosi.sat.nxv8f16.nxv8i16() +declare @llvm.fptosi.sat.nxv2f16.nxv2i64() +declare @llvm.fptosi.sat.nxv4f16.nxv4i64() + +define @test_signed_v2f16_v2i32( %f) { +; CHECK32-LABEL: test_signed_v2f16_v2i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI14_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: lui a1, %hi(.LCPI14_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI14_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f16_v2i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI14_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI14_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI14_1)(a0) +; CHECK64-NEXT: li a0, 1 +; CHECK64-NEXT: slli a0, a0, 31 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: lui a0, 524288 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f16.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i32( %f) { +; CHECK32-LABEL: test_signed_v4f16_v4i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI15_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI15_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: lui a1, %hi(.LCPI15_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI15_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f16_v4i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI15_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI15_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI15_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI15_1)(a0) +; CHECK64-NEXT: li a0, 1 +; CHECK64-NEXT: slli a0, a0, 31 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: lui a0, 524288 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f16.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f16_v8i32( %f) { +; CHECK32-LABEL: test_signed_v8f16_v8i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI16_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI16_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK32-NEXT: vmfge.vf v10, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v10 +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: lui a1, %hi(.LCPI16_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI16_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK32-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK32-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v8f16_v8i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI16_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI16_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK64-NEXT: vmfge.vf v10, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v10 +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK64-NEXT: lui a0, %hi(.LCPI16_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI16_1)(a0) +; CHECK64-NEXT: li a0, 1 +; CHECK64-NEXT: slli a0, a0, 31 +; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK64-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: lui a0, 524288 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK64-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f16.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i16( %f) { +; CHECK32-LABEL: test_signed_v4f16_v4i16: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI17_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: lui a0, %hi(.LCPI17_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI17_1)(a0) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK32-NEXT: lui a0, 8 +; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vmfne.vv v8, v8, v8 +; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK32-NEXT: vmv.v.v v0, v8 +; CHECK32-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f16_v4i16: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI17_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: lui a0, %hi(.LCPI17_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI17_1)(a0) +; CHECK64-NEXT: vfcvt.rtz.x.f.v v9, v8 +; CHECK64-NEXT: lui a0, 8 +; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vmfne.vv v8, v8, v8 +; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK64-NEXT: vmv.v.v v0, v8 +; CHECK64-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f16.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f16_v8i16( %f) { +; CHECK32-LABEL: test_signed_v8f16_v8i16: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI18_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK32-NEXT: vmfge.vf v10, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v10 +; CHECK32-NEXT: lui a0, %hi(.LCPI18_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI18_1)(a0) +; CHECK32-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK32-NEXT: lui a0, 8 +; CHECK32-NEXT: vmerge.vxm v12, v10, a0, v0 +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: vmfne.vv v10, v8, v8 +; CHECK32-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK32-NEXT: vmv1r.v v0, v10 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v8f16_v8i16: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI18_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK64-NEXT: vmfge.vf v10, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v10 +; CHECK64-NEXT: lui a0, %hi(.LCPI18_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI18_1)(a0) +; CHECK64-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK64-NEXT: lui a0, 8 +; CHECK64-NEXT: vmerge.vxm v12, v10, a0, v0 +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: addiw a0, a0, -1 +; CHECK64-NEXT: vmfne.vv v10, v8, v8 +; CHECK64-NEXT: vmerge.vxm v8, v12, a0, v0 +; CHECK64-NEXT: vmv1r.v v0, v10 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv8f16.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f16_v2i64( %f) { +; CHECK32-LABEL: test_signed_v2f16_v2i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: sw zero, 8(sp) +; CHECK32-NEXT: li a1, -1 +; CHECK32-NEXT: sw a1, 8(sp) +; CHECK32-NEXT: lui a1, %hi(.LCPI19_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI19_0)(a1) +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v10, (a0), zero +; CHECK32-NEXT: vfwcvt.f.f.v v9, v8 +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v9 +; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmerge.vvm v10, v12, v10, v0 +; CHECK32-NEXT: lui a1, %hi(.LCPI19_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI19_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vlse64.v v12, (a0), zero +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f16_v2i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI19_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: vfwcvt.f.f.v v9, v8 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v10, v9 +; CHECK64-NEXT: li a0, -1 +; CHECK64-NEXT: lui a1, %hi(.LCPI19_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI19_1)(a1) +; CHECK64-NEXT: slli a1, a0, 63 +; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmerge.vxm v10, v10, a1, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: srli a0, a0, 1 +; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv2f16.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f16_v4i64( %f) { +; CHECK32-LABEL: test_signed_v4f16_v4i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: addi sp, sp, -16 +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: lui a0, 524288 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: sw zero, 8(sp) +; CHECK32-NEXT: li a1, -1 +; CHECK32-NEXT: sw a1, 8(sp) +; CHECK32-NEXT: lui a1, %hi(.LCPI20_0) +; CHECK32-NEXT: flh ft0, %lo(.LCPI20_0)(a1) +; CHECK32-NEXT: addi a0, a0, -1 +; CHECK32-NEXT: sw a0, 12(sp) +; CHECK32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK32-NEXT: vmfge.vf v9, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v9 +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v12, (a0), zero +; CHECK32-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v16, v10 +; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmerge.vvm v12, v16, v12, v0 +; CHECK32-NEXT: lui a1, %hi(.LCPI20_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI20_1)(a1) +; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK32-NEXT: vlse64.v v16, (a0), zero +; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmerge.vvm v12, v12, v16, v0 +; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK32-NEXT: addi sp, sp, 16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f16_v4i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK64-NEXT: flh ft0, %lo(.LCPI20_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfge.vf v9, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v9 +; CHECK64-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vfwcvt.rtz.x.f.v v12, v10 +; CHECK64-NEXT: li a0, -1 +; CHECK64-NEXT: lui a1, %hi(.LCPI20_1) +; CHECK64-NEXT: flh ft0, %lo(.LCPI20_1)(a1) +; CHECK64-NEXT: slli a1, a0, 63 +; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmerge.vxm v12, v12, a1, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK64-NEXT: srli a0, a0, 1 +; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK64-NEXT: vmfne.vv v0, v8, v8 +; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptosi.sat.nxv4f16.nxv4i64( %f) + ret %x +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll @@ -0,0 +1,524 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK64 + +; Float + +declare @llvm.fptoui.sat.nxv2f32.nxv2i32() +declare @llvm.fptoui.sat.nxv4f32.nxv4i32() +declare @llvm.fptoui.sat.nxv8f32.nxv8i32() +declare @llvm.fptoui.sat.nxv4f32.nxv4i16() +declare @llvm.fptoui.sat.nxv8f32.nxv8i16() +declare @llvm.fptoui.sat.nxv2f32.nxv2i64() +declare @llvm.fptoui.sat.nxv4f32.nxv4i64() + +define @test_signed_v2f32_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f32_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f32.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v10, v8, ft0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: vmfge.vf v11, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v11 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f32.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f32_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vmfgt.vf v12, v8, ft0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: vmfge.vf v13, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v13 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f32.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f32_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: fmv.w.x ft1, zero +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, ft1 +; CHECK-NEXT: vfmin.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v10 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f32.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f32_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f32_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: fmv.w.x ft1, zero +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, ft1 +; CHECK-NEXT: vfmin.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v12 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f32.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f32_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f32_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v10, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f32.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f32_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI6_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v10, v8, ft0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: vmfge.vf v11, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v11 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vim v12, v12, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v8, v12, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f32.nxv4i64( %f) + ret %x +} + +; Double + +declare @llvm.fptoui.sat.nxv2f64.nxv2i32() +declare @llvm.fptoui.sat.nxv4f64.nxv4i32() +declare @llvm.fptoui.sat.nxv8f64.nxv8i32() +declare @llvm.fptoui.sat.nxv4f64.nxv4i16() +declare @llvm.fptoui.sat.nxv8f64.nxv8i16() +declare @llvm.fptoui.sat.nxv2f64.nxv2i64() +declare @llvm.fptoui.sat.nxv4f64.nxv4i64() + +define @test_signed_v2f64_v2i32( %f) { +; CHECK32-LABEL: test_signed_v2f64_v2i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI7_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI7_0)(a0) +; CHECK32-NEXT: fcvt.d.w ft1, zero +; CHECK32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK32-NEXT: vfmax.vf v8, v8, ft1 +; CHECK32-NEXT: vfmin.vf v10, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vfncvt.rtz.xu.f.w v8, v10 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f64_v2i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI7_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI7_0)(a0) +; CHECK64-NEXT: fmv.d.x ft1, zero +; CHECK64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK64-NEXT: vfmax.vf v8, v8, ft1 +; CHECK64-NEXT: vfmin.vf v10, v8, ft0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK64-NEXT: vfncvt.rtz.xu.f.w v8, v10 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f64.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i32( %f) { +; CHECK32-LABEL: test_signed_v4f64_v4i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI8_0)(a0) +; CHECK32-NEXT: fcvt.d.w ft1, zero +; CHECK32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK32-NEXT: vfmax.vf v8, v8, ft1 +; CHECK32-NEXT: vfmin.vf v12, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vfncvt.rtz.xu.f.w v8, v12 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f64_v4i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI8_0)(a0) +; CHECK64-NEXT: fmv.d.x ft1, zero +; CHECK64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK64-NEXT: vfmax.vf v8, v8, ft1 +; CHECK64-NEXT: vfmin.vf v12, v8, ft0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vfncvt.rtz.xu.f.w v8, v12 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f64.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f64_v8i32( %f) { +; CHECK32-LABEL: test_signed_v8f64_v8i32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI9_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI9_0)(a0) +; CHECK32-NEXT: fcvt.d.w ft1, zero +; CHECK32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK32-NEXT: vfmax.vf v8, v8, ft1 +; CHECK32-NEXT: vfmin.vf v16, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK32-NEXT: vfncvt.rtz.xu.f.w v8, v16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v8f64_v8i32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI9_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI9_0)(a0) +; CHECK64-NEXT: fmv.d.x ft1, zero +; CHECK64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK64-NEXT: vfmax.vf v8, v8, ft1 +; CHECK64-NEXT: vfmin.vf v16, v8, ft0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK64-NEXT: vfncvt.rtz.xu.f.w v8, v16 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f64.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f64_v4i16( %f) { +; CHECK32-LABEL: test_signed_v4f64_v4i16: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI10_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI10_0)(a0) +; CHECK32-NEXT: fcvt.d.w ft1, zero +; CHECK32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK32-NEXT: vfmax.vf v8, v8, ft1 +; CHECK32-NEXT: vfmin.vf v8, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK32-NEXT: vncvt.x.x.w v8, v12 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f64_v4i16: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI10_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI10_0)(a0) +; CHECK64-NEXT: fmv.d.x ft1, zero +; CHECK64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK64-NEXT: vfmax.vf v8, v8, ft1 +; CHECK64-NEXT: vfmin.vf v8, v8, ft0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK64-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK64-NEXT: vncvt.x.x.w v8, v12 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f64.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f64_v8i16( %f) { +; CHECK32-LABEL: test_signed_v8f64_v8i16: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; CHECK32-NEXT: fcvt.d.w ft1, zero +; CHECK32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK32-NEXT: vfmax.vf v8, v8, ft1 +; CHECK32-NEXT: vfmin.vf v8, v8, ft0 +; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK32-NEXT: vfncvt.rtz.xu.f.w v16, v8 +; CHECK32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK32-NEXT: vncvt.x.x.w v8, v16 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v8f64_v8i16: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; CHECK64-NEXT: fmv.d.x ft1, zero +; CHECK64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK64-NEXT: vfmax.vf v8, v8, ft1 +; CHECK64-NEXT: vfmin.vf v8, v8, ft0 +; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK64-NEXT: vfncvt.rtz.xu.f.w v16, v8 +; CHECK64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK64-NEXT: vncvt.x.x.w v8, v16 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f64.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f64_v2i64( %f) { +; CHECK32-LABEL: test_signed_v2f64_v2i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI12_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK32-NEXT: vmfgt.vf v10, v8, ft0 +; CHECK32-NEXT: fcvt.d.w ft0, zero +; CHECK32-NEXT: vmfge.vf v11, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v11 +; CHECK32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: vmv1r.v v0, v10 +; CHECK32-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v2f64_v2i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI12_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK64-NEXT: vmfgt.vf v10, v8, ft0 +; CHECK64-NEXT: fmv.d.x ft0, zero +; CHECK64-NEXT: vmfge.vf v11, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v11 +; CHECK64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: vmv1r.v v0, v10 +; CHECK64-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f64.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f64_v4i64( %f) { +; CHECK32-LABEL: test_signed_v4f64_v4i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK32-NEXT: fld ft0, %lo(.LCPI13_0)(a0) +; CHECK32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK32-NEXT: vmfgt.vf v12, v8, ft0 +; CHECK32-NEXT: fcvt.d.w ft0, zero +; CHECK32-NEXT: vmfge.vf v13, v8, ft0 +; CHECK32-NEXT: vmnot.m v0, v13 +; CHECK32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: vmv1r.v v0, v12 +; CHECK32-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK32-NEXT: ret +; +; CHECK64-LABEL: test_signed_v4f64_v4i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK64-NEXT: fld ft0, %lo(.LCPI13_0)(a0) +; CHECK64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK64-NEXT: vmfgt.vf v12, v8, ft0 +; CHECK64-NEXT: fmv.d.x ft0, zero +; CHECK64-NEXT: vmfge.vf v13, v8, ft0 +; CHECK64-NEXT: vmnot.m v0, v13 +; CHECK64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK64-NEXT: vmv1r.v v0, v12 +; CHECK64-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK64-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f64.nxv4i64( %f) + ret %x +} + + +; half + +declare @llvm.fptoui.sat.nxv2f16.nxv2i32() +declare @llvm.fptoui.sat.nxv4f16.nxv4i32() +declare @llvm.fptoui.sat.nxv8f16.nxv8i32() +declare @llvm.fptoui.sat.nxv4f16.nxv4i16() +declare @llvm.fptoui.sat.nxv8f16.nxv8i16() +declare @llvm.fptoui.sat.nxv2f16.nxv2i64() +declare @llvm.fptoui.sat.nxv4f16.nxv4i64() + +define @test_signed_v2f16_v2i32( %f) { +; CHECK-LABEL: test_signed_v2f16_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f16.nxv2i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i32( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI15_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI15_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v10, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f16.nxv4i32( %f) + ret %x +} + +define @test_signed_v8f16_v8i32( %f) { +; CHECK-LABEL: test_signed_v8f16_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI16_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI16_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v10, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v11, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v11 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vim v12, v12, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v8, v12, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f16.nxv8i32( %f) + ret %x +} + +define @test_signed_v4f16_v4i16( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI17_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f16.nxv4i16( %f) + ret %x +} + +define @test_signed_v8f16_v8i16( %f) { +; CHECK-LABEL: test_signed_v8f16_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI18_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v10, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v11, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v11 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv8f16.nxv8i16( %f) + ret %x +} + +define @test_signed_v2f16_v2i64( %f) { +; CHECK-LABEL: test_signed_v2f16_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI19_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmerge.vim v10, v12, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v10, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv2f16.nxv2i64( %f) + ret %x +} + +define @test_signed_v4f16_v4i64( %f) { +; CHECK-LABEL: test_signed_v4f16_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI20_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v9, v8, ft0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: vmfge.vf v10, v8, ft0 +; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vim v12, v12, 0, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v12, -1, v0 +; CHECK-NEXT: ret + %x = call @llvm.fptoui.sat.nxv4f16.nxv4i64( %f) + ret %x +} +