Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1479,6 +1479,17 @@ if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 64) setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); + // Signalling comparison has to be expanded as there's no signalling version + // of the vector comparison instructions. + setOperationAction(ISD::STRICT_FSETCCS, VT, Expand); + // FIXME: We could potentially make use of the vector comparison instructions + // for STRICT_FSETCC, but some kinds of comparison require more than one + // FCM instruction which wouldn't be valid so would need to get expanded + // instead. The lowering also involves target-specific ISD nodes so we would + // likely need to add strict versions of all of them and handle them + // appropriately. + setOperationAction(ISD::STRICT_FSETCC, VT, Expand); + if (Subtarget->isLittleEndian()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { @@ -3381,7 +3392,8 @@ // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. // Any additional optimization in this function should be recorded // in the cost tables. - EVT InVT = Op.getOperand(0).getValueType(); + bool IsStrict = Op->isStrictFPOpcode(); + EVT InVT = Op.getOperand(IsStrict ? 1 : 0).getValueType(); EVT VT = Op.getValueType(); if (VT.isScalableVector()) { @@ -3401,6 +3413,13 @@ !Subtarget->hasFullFP16()) { MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts); SDLoc dl(Op); + if (IsStrict) { + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, + {NewVT, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } return DAG.getNode( Op.getOpcode(), dl, Op.getValueType(), DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0))); @@ -3410,6 +3429,13 @@ uint64_t InVTSize = InVT.getFixedSizeInBits(); if (VTSize < InVTSize) { SDLoc dl(Op); + if (IsStrict) { + InVT = InVT.changeVectorElementTypeToInteger(); + SDValue Cv = DAG.getNode(Op.getOpcode(), dl, {InVT, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); + return DAG.getMergeValues({Trunc, Cv.getValue(1)}, dl); + } SDValue Cv = DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(), Op.getOperand(0)); @@ -3421,10 +3447,33 @@ MVT ExtVT = MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()), VT.getVectorNumElements()); + if (IsStrict) { + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, + {ExtVT, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0)); return DAG.getNode(Op.getOpcode(), dl, VT, Ext); } + // Use a scalar operation for conversions between single-element vectors of + // the same size. + if (NumElts == 1) { + SDLoc dl(Op); + SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InVT.getScalarType(), + Op.getOperand(IsStrict ? 1 : 0), + DAG.getConstant(0, dl, MVT::i64)); + EVT ScalarVT = VT.getScalarType(); + SDValue ScalarCvt; + if (IsStrict) + return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other}, + {Op.getOperand(0), Extract}); + return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract); + } + // Type changing conversions are illegal. return Op; } @@ -3587,9 +3636,10 @@ // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. // Any additional optimization in this function should be recorded // in the cost tables. + bool IsStrict = Op->isStrictFPOpcode(); EVT VT = Op.getValueType(); SDLoc dl(Op); - SDValue In = Op.getOperand(0); + SDValue In = Op.getOperand(IsStrict ? 1 : 0); EVT InVT = In.getValueType(); unsigned Opc = Op.getOpcode(); bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP; @@ -3617,6 +3667,13 @@ MVT CastVT = MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()), InVT.getVectorNumElements()); + if (IsStrict) { + In = DAG.getNode(Opc, dl, {CastVT, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + return DAG.getNode( + ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other}, + {In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)}); + } In = DAG.getNode(Opc, dl, CastVT, In); return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl)); } @@ -3625,9 +3682,28 @@ unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; EVT CastVT = VT.changeVectorElementTypeToInteger(); In = DAG.getNode(CastOpc, dl, CastVT, In); + if (IsStrict) + return DAG.getNode(Opc, dl, {VT, MVT::Other}, + {Op.getOperand(0), In}); return DAG.getNode(Opc, dl, VT, In); } + // Use a scalar operation for conversions between single-element vectors of + // the same size. + if (VT.getVectorNumElements() == 1) { + SDLoc dl(Op); + SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InVT.getScalarType(), + Op.getOperand(IsStrict ? 1 : 0), + DAG.getConstant(0, dl, MVT::i64)); + EVT ScalarVT = VT.getScalarType(); + SDValue ScalarCvt; + if (IsStrict) + return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other}, + {Op.getOperand(0), Extract}); + return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract); + } + return Op; } Index: llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll @@ -0,0 +1,886 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-eabi %s -disable-strictnode-mutation -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 -disable-strictnode-mutation %s -o - | FileCheck %s + +; Check that constrained fp vector intrinsics are correctly lowered. + + +; Single-precision intrinsics + +define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) #0 { +; CHECK-LABEL: add_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) #0 { +; CHECK-LABEL: sub_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) #0 { +; CHECK-LABEL: mul_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) #0 { +; CHECK-LABEL: div_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @fma_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 { +; CHECK-LABEL: fma_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x i32> @fptosi_v4i32_v4f32(<4 x float> %x) #0 { +; CHECK-LABEL: fptosi_v4i32_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %val = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 + ret <4 x i32> %val +} + +define <4 x i32> @fptoui_v4i32_v4f32(<4 x float> %x) #0 { +; CHECK-LABEL: fptoui_v4i32_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %val = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 + ret <4 x i32> %val +} + +define <4 x i64> @fptosi_v4i64_v4f32(<4 x float> %x) #0 { +; CHECK-LABEL: fptosi_v4i64_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: ret + %val = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 + ret <4 x i64> %val +} + +define <4 x i64> @fptoui_v4i64_v4f32(<4 x float> %x) #0 { +; CHECK-LABEL: fptoui_v4i64_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-NEXT: ret + %val = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 + ret <4 x i64> %val +} + +define <4 x float> @sitofp_v4f32_v4i32(<4 x i32> %x) #0 { +; CHECK-LABEL: sitofp_v4f32_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf v0.4s, v0.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @uitofp_v4f32_v4i32(<4 x i32> %x) #0 { +; CHECK-LABEL: uitofp_v4f32_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf v0.4s, v0.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @sitofp_v4f32_v4i64(<4 x i64> %x) #0 { +; CHECK-LABEL: sitofp_v4f32_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf v0.2d, v0.2d +; CHECK-NEXT: scvtf v1.2d, v1.2d +; CHECK-NEXT: fcvtn v0.2s, v0.2d +; CHECK-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @uitofp_v4f32_v4i64(<4 x i64> %x) #0 { +; CHECK-LABEL: uitofp_v4f32_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NEXT: ucvtf v1.2d, v1.2d +; CHECK-NEXT: fcvtn v0.2s, v0.2d +; CHECK-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @sqrt_v4f32(<4 x float> %x) #0 { +; CHECK-LABEL: sqrt_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fsqrt v0.4s, v0.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @rint_v4f32(<4 x float> %x) #0 { +; CHECK-LABEL: rint_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @nearbyint_v4f32(<4 x float> %x) #0 { +; CHECK-LABEL: nearbyint_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frinti v0.4s, v0.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @maxnum_v4f32(<4 x float> %x, <4 x float> %y) #0 { +; CHECK-LABEL: maxnum_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float> %x, <4 x float> %y, metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @minnum_v4f32(<4 x float> %x, <4 x float> %y) #0 { +; CHECK-LABEL: minnum_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float> %x, <4 x float> %y, metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @ceil_v4f32(<4 x float> %x) #0 { +; CHECK-LABEL: ceil_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.4s, v0.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @floor_v4f32(<4 x float> %x) #0 { +; CHECK-LABEL: floor_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.4s, v0.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @round_v4f32(<4 x float> %x) #0 { +; CHECK-LABEL: round_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.4s, v0.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @roundeven_v4f32(<4 x float> %x) #0 { +; CHECK-LABEL: roundeven_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.4s, v0.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x float> @trunc_v4f32(<4 x float> %x) #0 { +; CHECK-LABEL: trunc_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.4s, v0.4s +; CHECK-NEXT: ret + %val = call <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 + ret <4 x float> %val +} + +define <4 x i1> @fcmp_v4f32(<4 x float> %x, <4 x float> %y) #0 { +; CHECK-LABEL: fcmp_v4f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov s2, v1.s[1] +; CHECK-NEXT: mov s3, v0.s[1] +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: mov s4, v1.s[2] +; CHECK-NEXT: mov s5, v0.s[2] +; CHECK-NEXT: mov s1, v1.s[3] +; CHECK-NEXT: mov s0, v0.s[3] +; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: fcmp s3, s2 +; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: fcmp s5, s4 +; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: mov v2.s[2], w8 +; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: mov v2.s[3], w8 +; CHECK-NEXT: xtn v0.4h, v2.4s +; CHECK-NEXT: ret +entry: + %val = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict") + ret <4 x i1> %val +} + +define <4 x i1> @fcmps_v4f32(<4 x float> %x, <4 x float> %y) #0 { +; CHECK-LABEL: fcmps_v4f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov s2, v1.s[1] +; CHECK-NEXT: mov s3, v0.s[1] +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: mov s4, v1.s[2] +; CHECK-NEXT: mov s5, v0.s[2] +; CHECK-NEXT: mov s1, v1.s[3] +; CHECK-NEXT: mov s0, v0.s[3] +; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: fcmpe s3, s2 +; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: fcmpe s5, s4 +; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: mov v2.s[2], w8 +; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: mov v2.s[3], w8 +; CHECK-NEXT: xtn v0.4h, v2.4s +; CHECK-NEXT: ret +entry: + %val = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict") + ret <4 x i1> %val +} + + +; Double-precision intrinsics + +define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) #0 { +; CHECK-LABEL: add_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) #0 { +; CHECK-LABEL: sub_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) #0 { +; CHECK-LABEL: mul_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) #0 { +; CHECK-LABEL: div_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @fma_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) #0 { +; CHECK-LABEL: fma_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla v2.2d, v1.2d, v0.2d +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x i32> @fptosi_v2i32_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: fptosi_v2i32_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret + %val = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0 + ret <2 x i32> %val +} + +define <2 x i32> @fptoui_v2i32_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: fptoui_v2i32_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret + %val = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0 + ret <2 x i32> %val +} + +define <2 x i64> @fptosi_v2i64_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: fptosi_v2i64_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0 + ret <2 x i64> %val +} + +define <2 x i64> @fptoui_v2i64_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: fptoui_v2i64_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0 + ret <2 x i64> %val +} + +define <2 x double> @sitofp_v2f64_v2i32(<2 x i32> %x) #0 { +; CHECK-LABEL: sitofp_v2f64_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: scvtf v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @uitofp_v2f64_v2i32(<2 x i32> %x) #0 { +; CHECK-LABEL: uitofp_v2f64_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @sitofp_v2f64_v2i64(<2 x i64> %x) #0 { +; CHECK-LABEL: sitofp_v2f64_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @uitofp_v2f64_v2i64(<2 x i64> %x) #0 { +; CHECK-LABEL: uitofp_v2f64_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @sqrt_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: sqrt_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fsqrt v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @rint_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: rint_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @nearbyint_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: nearbyint_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frinti v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @maxnum_v2f64(<2 x double> %x, <2 x double> %y) #0 { +; CHECK-LABEL: maxnum_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double> %x, <2 x double> %y, metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @minnum_v2f64(<2 x double> %x, <2 x double> %y) #0 { +; CHECK-LABEL: minnum_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double> %x, <2 x double> %y, metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @ceil_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: ceil_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @floor_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: floor_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @round_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: round_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @roundeven_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: roundeven_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x double> @trunc_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: trunc_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.2d, v0.2d +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + +define <2 x i1> @fcmp_v2f64(<2 x double> %x, <2 x double> %y) #0 { +; CHECK-LABEL: fcmp_v2f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov d2, v1.d[1] +; CHECK-NEXT: mov d3, v0.d[1] +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: csetm x8, eq +; CHECK-NEXT: fcmp d3, d2 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: csetm x8, eq +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %val = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %x, <2 x double> %y, metadata !"oeq", metadata !"fpexcept.strict") + ret <2 x i1> %val +} + +define <2 x i1> @fcmps_v2f64(<2 x double> %x, <2 x double> %y) #0 { +; CHECK-LABEL: fcmps_v2f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov d2, v1.d[1] +; CHECK-NEXT: mov d3, v0.d[1] +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: csetm x8, eq +; CHECK-NEXT: fcmpe d3, d2 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: csetm x8, eq +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %val = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %x, <2 x double> %y, metadata !"oeq", metadata !"fpexcept.strict") + ret <2 x i1> %val +} + + +; Double-precision single element intrinsics + +define <1 x double> @add_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: add_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd d0, d0, d1 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.fadd.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @sub_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: sub_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub d0, d0, d1 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.fsub.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @mul_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: mul_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul d0, d0, d1 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.fmul.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @div_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: div_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.fdiv.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @fma_v1f64(<1 x double> %x, <1 x double> %y, <1 x double> %z) #0 { +; CHECK-LABEL: fma_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmadd d0, d0, d1, d2 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> %x, <1 x double> %y, <1 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x i32> @fptosi_v1i32_v1f64(<1 x double> %x) #0 { +; CHECK-LABEL: fptosi_v1i32_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ret + %val = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0 + ret <1 x i32> %val +} + +define <1 x i32> @fptoui_v1i32_v1f64(<1 x double> %x) #0 { +; CHECK-LABEL: fptoui_v1i32_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ret + %val = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0 + ret <1 x i32> %val +} + +define <1 x i64> @fptosi_v1i64_v1f64(<1 x double> %x) #0 { +; CHECK-LABEL: fptosi_v1i64_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %val = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0 + ret <1 x i64> %val +} + +define <1 x i64> @fptoui_v1i64_v1f64(<1 x double> %x) #0 { +; CHECK-LABEL: fptoui_v1i64_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x8, d0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %val = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0 + ret <1 x i64> %val +} + +define <1 x double> @sitofp_v1f64_v1i32(<1 x i32> %x) #0 { +; CHECK-LABEL: sitofp_v1f64_v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: scvtf d0, w8 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @uitofp_v1f64_v1i32(<1 x i32> %x) #0 { +; CHECK-LABEL: uitofp_v1f64_v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: ucvtf d0, w8 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @sitofp_v1f64_v1i64(<1 x i64> %x) #0 { +; CHECK-LABEL: sitofp_v1f64_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: scvtf d0, x8 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @uitofp_v1f64_v1i64(<1 x i64> %x) #0 { +; CHECK-LABEL: uitofp_v1f64_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: ucvtf d0, x8 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @sqrt_v1f64(<1 x double> %x) #0 { +; CHECK-LABEL: sqrt_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fsqrt d0, d0 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.sqrt.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @rint_v1f64(<1 x double> %x) #0 { +; CHECK-LABEL: rint_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.rint.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @nearbyint_v1f64(<1 x double> %x) #0 { +; CHECK-LABEL: nearbyint_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frinti d0, d0 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.nearbyint.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @maxnum_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: maxnum_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm d0, d0, d1 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.maxnum.v1f64(<1 x double> %x, <1 x double> %y, metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @minnum_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: minnum_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm d0, d0, d1 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.minnum.v1f64(<1 x double> %x, <1 x double> %y, metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @ceil_v1f64(<1 x double> %x) #0 { +; CHECK-LABEL: ceil_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp d0, d0 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @floor_v1f64(<1 x double> %x) #0 { +; CHECK-LABEL: floor_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm d0, d0 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @round_v1f64(<1 x double> %x) #0 { +; CHECK-LABEL: round_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta d0, d0 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @roundeven_v1f64(<1 x double> %x) #0 { +; CHECK-LABEL: roundeven_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn d0, d0 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x double> @trunc_v1f64(<1 x double> %x) #0 { +; CHECK-LABEL: trunc_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz d0, d0 +; CHECK-NEXT: ret + %val = call <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + +define <1 x i1> @fcmp_v1f61(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: fcmp_v1f61: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %val = call <1 x i1> @llvm.experimental.constrained.fcmp.v1f64(<1 x double> %x, <1 x double> %y, metadata !"oeq", metadata !"fpexcept.strict") + ret <1 x i1> %val +} + +define <1 x i1> @fcmps_v1f61(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: fcmps_v1f61: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %val = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> %x, <1 x double> %y, metadata !"oeq", metadata !"fpexcept.strict") + ret <1 x i1> %val +} + + +; Intrinsics to convert between floating-point types + +define <2 x float> @fptrunc_v2f32_v2f64(<2 x double> %x) #0 { +; CHECK-LABEL: fptrunc_v2f32_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtn v0.2s, v0.2d +; CHECK-NEXT: ret + %val = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <2 x float> %val +} + +define <2 x double> @fpext_v2f64_v2f32(<2 x float> %x) #0 { +; CHECK-LABEL: fpext_v2f64_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: ret + %val = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float> %x, metadata !"fpexcept.strict") #0 + ret <2 x double> %val +} + + +attributes #0 = { strictfp } + +declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) +declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata) +declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata) +declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float>, metadata) +declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float>, metadata) +declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float>, <4 x float>, metadata) +declare <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float>, <4 x float>, metadata) +declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata) +declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata) +declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata) +declare <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float>, metadata) +declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata) +declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x float>, <4 x float>, metadata, metadata) +declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x float>, <4 x float>, metadata, metadata) + +declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) +declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata) +declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata) +declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata) +declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata) +declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata) +declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata) +declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata) +declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata) +declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata) +declare <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double>, metadata) +declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata) +declare <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +declare <1 x double> @llvm.experimental.constrained.fadd.v1f64(<1 x double>, <1 x double>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.fsub.v1f64(<1 x double>, <1 x double>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.fmul.v1f64(<1 x double>, <1 x double>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.fdiv.v1f64(<1 x double>, <1 x double>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double>, <1 x double>, <1 x double>, metadata, metadata) +declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double>, metadata) +declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double>, metadata) +declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double>, metadata) +declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double>, metadata) +declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.sqrt.v1f64(<1 x double>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.rint.v1f64(<1 x double>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.nearbyint.v1f64(<1 x double>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.maxnum.v1f64(<1 x double>, <1 x double>, metadata) +declare <1 x double> @llvm.experimental.constrained.minnum.v1f64(<1 x double>, <1 x double>, metadata) +declare <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double>, metadata) +declare <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double>, metadata) +declare <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double>, metadata) +declare <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double>, metadata) +declare <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double>, metadata) +declare <1 x i1> @llvm.experimental.constrained.fcmp.v1f64(<1 x double>, <1 x double>, metadata, metadata) +declare <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double>, <1 x double>, metadata, metadata) + +declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)