Index: clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c =================================================================== --- clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c +++ clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c @@ -1,24 +1,21 @@ // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 \ // RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone \ // RUN: -emit-llvm -o - %s | opt -S -mem2reg \ -// RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s +// RUN: | FileCheck --check-prefixes=COMMON,COMMONIR,UNCONSTRAINED %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 \ // RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone \ -// RUN: -ffp-exception-behavior=strict -emit-llvm -o - %s | opt -S -mem2reg \ -// RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s +// RUN: -ffp-exception-behavior=strict -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck --check-prefixes=COMMON,COMMONIR,CONSTRAINED %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 \ // RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -o - %s \ -// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s +// RUN: | FileCheck --check-prefixes=COMMON,CHECK-ASM,CHECK-ASM-UNCONSTRAINED %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 \ -// RUN: -ffp-exception-behavior=strict \ +// RUN: -ffp-exception-behavior=strict -fexperimental-strict-floating-point \ // RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -o - %s \ -// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s +// RUN: | FileCheck --check-prefixes=COMMON,CHECK-ASM,CHECK-ASM-CONSTRAINED %s // REQUIRES: aarch64-registered-target -// "Lowering of strict fp16 not yet implemented" -// XFAIL: * - #include // COMMON-LABEL: test_vceqzh_f16 @@ -34,8 +31,9 @@ // COMMON-LABEL: test_vcgezh_f16 // UNCONSTRAINED: [[TMP1:%.*]] = fcmp oge half %a, 0xH0000 -// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half 0xH0000, metadata !"oge", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp +// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half 0xH0000, metadata !"oge", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp +// CHECK-ASM-CONSTRAINED: fcmpe // CHECK-ASM: cset {{w[0-9]+}}, ge // COMMONIR: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 // COMMONIR: ret i16 [[TMP2]] @@ -45,8 +43,9 @@ // COMMON-LABEL: test_vcgtzh_f16 // UNCONSTRAINED: [[TMP1:%.*]] = fcmp ogt half %a, 0xH0000 -// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half 0xH0000, metadata !"ogt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp +// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half 0xH0000, metadata !"ogt", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp +// CHECK-ASM-CONSTRAINED: fcmpe // CHECK-ASM: cset {{w[0-9]+}}, gt // COMMONIR: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 // COMMONIR: ret i16 [[TMP2]] @@ -56,8 +55,9 @@ // COMMON-LABEL: test_vclezh_f16 // UNCONSTRAINED: [[TMP1:%.*]] = fcmp ole half %a, 0xH0000 -// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half 0xH0000, metadata !"ole", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp +// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half 0xH0000, metadata !"ole", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp +// CHECK-ASM-CONSTRAINED: fcmpe // CHECK-ASM: cset {{w[0-9]+}}, ls // COMMONIR: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 // COMMONIR: ret i16 [[TMP2]] @@ -67,8 +67,9 @@ // COMMON-LABEL: test_vcltzh_f16 // UNCONSTRAINED: [[TMP1:%.*]] = fcmp olt half %a, 0xH0000 -// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half 0xH0000, metadata !"olt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp +// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half 0xH0000, metadata !"olt", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp +// CHECK-ASM-CONSTRAINED: fcmpe // CHECK-ASM: cset {{w[0-9]+}}, mi // COMMONIR: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 // COMMONIR: ret i16 [[TMP2]] @@ -131,60 +132,50 @@ } // COMMON-LABEL: test_vcvth_s16_f16 -// UNCONSTRAINED: [[VCVT:%.*]] = fptosi half %a to i16 -// CONSTRAINED: [[VCVT:%.*]] = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %a, metadata !"fpexcept.strict") -// CHECK-ASM: fcvt [[CVTREG:s[0-9]+]], {{h[0-9]+}} -// CHECK-ASM: fcvtzs {{w[0-9]+}}, [[CVTREG]] -// COMMONIR: ret i16 [[VCVT]] +// COMMONIR: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a) +// COMMONIR: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16 +// CHECK-ASM: fcvtzs {{w[0-9]+}}, {{h[0-9]+}} +// COMMONIR: ret i16 [[TRUNC]] int16_t test_vcvth_s16_f16 (float16_t a) { return vcvth_s16_f16(a); } // COMMON-LABEL: test_vcvth_s32_f16 -// UNCONSTRAINED: [[VCVT:%.*]] = fptosi half %a to i32 -// CONSTRAINED: [[VCVT:%.*]] = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") -// CHECK-ASM: fcvt [[CVTREG:s[0-9]+]], {{h[0-9]+}} -// CHECK-ASM: fcvtzs {{w[0-9]+}}, [[CVTREG]] +// COMMONIR: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a) +// CHECK-ASM: fcvtzs {{w[0-9]+}}, {{h[0-9]+}} // COMMONIR: ret i32 [[VCVT]] int32_t test_vcvth_s32_f16 (float16_t a) { return vcvth_s32_f16(a); } // COMMON-LABEL: test_vcvth_s64_f16 -// UNCONSTRAINED: [[VCVT:%.*]] = fptosi half %a to i64 -// CONSTRAINED: [[VCVT:%.*]] = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %a, metadata !"fpexcept.strict") -// CHECK-ASM: fcvt [[CVTREG:s[0-9]+]], {{h[0-9]+}} -// CHECK-ASM: fcvtzs {{x[0-9]+}}, [[CVTREG]] +// COMMONIR: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a) +// CHECK-ASM: fcvtzs {{x[0-9]+}}, {{h[0-9]+}} // COMMONIR: ret i64 [[VCVT]] int64_t test_vcvth_s64_f16 (float16_t a) { return vcvth_s64_f16(a); } // COMMON-LABEL: test_vcvth_u16_f16 -// UNCONSTRAINED: [[VCVT:%.*]] = fptoui half %a to i16 -// CONSTRAINED: [[VCVT:%.*]] = call i16 @llvm.experimental.constrained.fptoui.i16.f16(half %a, metadata !"fpexcept.strict") -// CHECK-ASM: fcvt [[CVTREG:s[0-9]+]], {{h[0-9]+}} -// CHECK-ASM: fcvtzu {{w[0-9]+}}, [[CVTREG]] -// COMMONIR: ret i16 [[VCVT]] +// COMMONIR: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a) +// COMMONIR: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16 +// CHECK-ASM: fcvtzu {{w[0-9]+}}, {{h[0-9]+}} +// COMMONIR: ret i16 [[TRUNC]] uint16_t test_vcvth_u16_f16 (float16_t a) { return vcvth_u16_f16(a); } // COMMON-LABEL: test_vcvth_u32_f16 -// UNCONSTRAINED: [[VCVT:%.*]] = fptoui half %a to i32 -// CONSTRAINED: [[VCVT:%.*]] = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") -// CHECK-ASM: fcvt [[CVTREG:s[0-9]+]], {{h[0-9]+}} -// CHECK-ASM: fcvtzu {{w[0-9]+}}, [[CVTREG]] +// COMMONIR: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a) +// CHECK-ASM: fcvtzu {{w[0-9]+}}, {{h[0-9]+}} // COMMONIR: ret i32 [[VCVT]] uint32_t test_vcvth_u32_f16 (float16_t a) { return vcvth_u32_f16(a); } // COMMON-LABEL: test_vcvth_u64_f16 -// UNCONSTRAINED: [[VCVT:%.*]] = fptoui half %a to i64 -// CONSTRAINED: [[VCVT:%.*]] = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %a, metadata !"fpexcept.strict") -// CHECK-ASM: fcvt [[CVTREG:s[0-9]+]], {{h[0-9]+}} -// CHECK-ASM: fcvtzu {{x[0-9]+}}, [[CVTREG]] +// COMMONIR: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a) +// CHECK-ASM: fcvtzu {{x[0-9]+}}, {{h[0-9]+}} // COMMONIR: ret i64 [[VCVT]] uint64_t test_vcvth_u64_f16 (float16_t a) { return vcvth_u64_f16(a); @@ -275,8 +266,9 @@ // COMMON-LABEL: test_vcgeh_f16 // UNCONSTRAINED: [[TMP1:%.*]] = fcmp oge half %a, %b -// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp +// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp +// CHECK-ASM-CONSTRAINED: fcmpe // CHECK-ASM: cset {{w[0-9]+}}, ge // COMMONIR: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 // COMMONIR: ret i16 [[TMP2]] @@ -286,8 +278,9 @@ // COMMON-LABEL: test_vcgth_f16 // UNCONSTRAINED: [[TMP1:%.*]] = fcmp ogt half %a, %b -// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp +// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp +// CHECK-ASM-CONSTRAINED: fcmpe // CHECK-ASM: cset {{w[0-9]+}}, gt // COMMONIR: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 // COMMONIR: ret i16 [[TMP2]] @@ -297,8 +290,9 @@ // COMMON-LABEL: test_vcleh_f16 // UNCONSTRAINED: [[TMP1:%.*]] = fcmp ole half %a, %b -// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp +// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp +// CHECK-ASM-CONSTRAINED: fcmpe // CHECK-ASM: cset {{w[0-9]+}}, ls // COMMONIR: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 // COMMONIR: ret i16 [[TMP2]] @@ -308,8 +302,9 @@ // COMMON-LABEL: test_vclth_f16 // UNCONSTRAINED: [[TMP1:%.*]] = fcmp olt half %a, %b -// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp +// CONSTRAINED: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp +// CHECK-ASM-CONSTRAINED: fcmpe // CHECK-ASM: cset {{w[0-9]+}}, mi // COMMONIR: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 // COMMONIR: ret i16 [[TMP2]] @@ -358,7 +353,8 @@ // CONSTRAINED: [[SUB:%.*]] = call half @llvm.experimental.constrained.fsub.f16(half 0xH8000, half %b, metadata !"round.tonearest", metadata !"fpexcept.strict") // UNCONSTRAINED: [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a) // CONSTRAINED: [[ADD:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half %c, half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") -// CHECK-ASM: fmsub +// CHECK-ASM: fsub +// CHECK-ASM: fmadd // COMMONIR: ret half [[ADD]] float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) { return vfmsh_f16(a, b, c); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4700,6 +4700,12 @@ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FMAXNUM: case ISD::STRICT_FREM: case ISD::STRICT_FPOW: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, @@ -4724,6 +4730,22 @@ DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), DAG.getIntPtrConstant(0, dl))); break; + case ISD::STRICT_FMA: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(2)}); + Tmp3 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(3)}); + Tmp4 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1), + Tmp2.getValue(1), Tmp3.getValue(1)); + Tmp4 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp4, Tmp1, Tmp2, Tmp3}); + Tmp4 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp4.getValue(1), Tmp4, DAG.getIntPtrConstant(0, dl)}); + Results.push_back(Tmp4); + Results.push_back(Tmp4.getValue(1)); + break; case ISD::FCOPYSIGN: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); @@ -4740,6 +4762,16 @@ Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); break; } + case ISD::STRICT_FPOWI: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1, Node->getOperand(2)}); + Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)}); + Results.push_back(Tmp3); + Results.push_back(Tmp3.getValue(1)); + break; case ISD::FFLOOR: case ISD::FCEIL: case ISD::FRINT: @@ -4764,12 +4796,19 @@ break; case ISD::STRICT_FFLOOR: case ISD::STRICT_FCEIL: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FROUND: + case ISD::STRICT_FROUNDEVEN: + case ISD::STRICT_FTRUNC: + case ISD::STRICT_FSQRT: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG2: case ISD::STRICT_FLOG10: case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, {Node->getOperand(0), Node->getOperand(1)}); Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -580,64 +580,37 @@ else setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); - setOperationAction(ISD::FREM, MVT::f16, Promote); - setOperationAction(ISD::FREM, MVT::v4f16, Expand); - setOperationAction(ISD::FREM, MVT::v8f16, Expand); - setOperationAction(ISD::FPOW, MVT::f16, Promote); - setOperationAction(ISD::FPOW, MVT::v4f16, Expand); - setOperationAction(ISD::FPOW, MVT::v8f16, Expand); - setOperationAction(ISD::FPOWI, MVT::f16, Promote); - setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); - setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); - setOperationAction(ISD::FCOS, MVT::f16, Promote); - setOperationAction(ISD::FCOS, MVT::v4f16, Expand); - setOperationAction(ISD::FCOS, MVT::v8f16, Expand); - setOperationAction(ISD::FSIN, MVT::f16, Promote); - setOperationAction(ISD::FSIN, MVT::v4f16, Expand); - setOperationAction(ISD::FSIN, MVT::v8f16, Expand); - setOperationAction(ISD::FSINCOS, MVT::f16, Promote); - setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand); - setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand); - setOperationAction(ISD::FEXP, MVT::f16, Promote); - setOperationAction(ISD::FEXP, MVT::v4f16, Expand); - setOperationAction(ISD::FEXP, MVT::v8f16, Expand); - setOperationAction(ISD::FEXP2, MVT::f16, Promote); - setOperationAction(ISD::FEXP2, MVT::v4f16, Expand); - setOperationAction(ISD::FEXP2, MVT::v8f16, Expand); - setOperationAction(ISD::FLOG, MVT::f16, Promote); - setOperationAction(ISD::FLOG, MVT::v4f16, Expand); - setOperationAction(ISD::FLOG, MVT::v8f16, Expand); - setOperationAction(ISD::FLOG2, MVT::f16, Promote); - setOperationAction(ISD::FLOG2, MVT::v4f16, Expand); - setOperationAction(ISD::FLOG2, MVT::v8f16, Expand); - setOperationAction(ISD::FLOG10, MVT::f16, Promote); - setOperationAction(ISD::FLOG10, MVT::v4f16, Expand); - setOperationAction(ISD::FLOG10, MVT::v8f16, Expand); + for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, + ISD::FCOS, ISD::FSIN, ISD::FSINCOS, + ISD::FEXP, ISD::FEXP2, ISD::FLOG, + ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM, + ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS, + ISD::STRICT_FSIN, ISD::STRICT_FEXP, ISD::STRICT_FEXP2, + ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) { + setOperationAction(Op, MVT::f16, Promote); + setOperationAction(Op, MVT::v4f16, Expand); + setOperationAction(Op, MVT::v8f16, Expand); + } if (!Subtarget->hasFullFP16()) { - setOperationAction(ISD::SELECT, MVT::f16, Promote); - setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); - setOperationAction(ISD::SETCC, MVT::f16, Promote); - setOperationAction(ISD::BR_CC, MVT::f16, Promote); - setOperationAction(ISD::FADD, MVT::f16, Promote); - setOperationAction(ISD::FSUB, MVT::f16, Promote); - setOperationAction(ISD::FMUL, MVT::f16, Promote); - setOperationAction(ISD::FDIV, MVT::f16, Promote); - setOperationAction(ISD::FMA, MVT::f16, Promote); - setOperationAction(ISD::FNEG, MVT::f16, Promote); - setOperationAction(ISD::FABS, MVT::f16, Promote); - setOperationAction(ISD::FCEIL, MVT::f16, Promote); - setOperationAction(ISD::FSQRT, MVT::f16, Promote); - setOperationAction(ISD::FFLOOR, MVT::f16, Promote); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); - setOperationAction(ISD::FRINT, MVT::f16, Promote); - setOperationAction(ISD::FROUND, MVT::f16, Promote); - setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); - setOperationAction(ISD::FTRUNC, MVT::f16, Promote); - setOperationAction(ISD::FMINNUM, MVT::f16, Promote); - setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); - setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); - setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); + for (auto Op : + {ISD::SELECT, ISD::SELECT_CC, ISD::SETCC, ISD::BR_CC, ISD::FADD, + ISD::FSUB, ISD::FMUL, ISD::FDIV, ISD::FMA, ISD::FNEG, ISD::FABS, + ISD::FCEIL, ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT, + ISD::FROUND, ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM, + ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, + ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA, + ISD::STRICT_FCEIL, ISD::STRICT_FSQRT, ISD::STRICT_FFLOOR, + ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, ISD::STRICT_FROUND, + ISD::STRICT_FROUNDEVEN, ISD::STRICT_FTRUNC, ISD::STRICT_FMINNUM, + ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM}) + setOperationAction(Op, MVT::f16, Promote); + + // Round-to-integer need custom lowering for fp16, as Promote doesn't work + // because the result type is integer. + for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT, + ISD::STRICT_LLRINT}) + setOperationAction(Op, MVT::f16, Custom); // promote v4f16 to v4f32 when that is known to be safe. setOperationAction(ISD::FADD, MVT::v4f16, Promote); @@ -1389,6 +1362,8 @@ } PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); + + IsStrictFPEnabled = true; } void AArch64TargetLowering::addTypeForNEON(MVT VT) { @@ -2567,7 +2542,18 @@ bool IsSignaling) { EVT VT = LHS.getValueType(); assert(VT != MVT::f128); - assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"); + + const bool FullFP16 = + static_cast(DAG.getSubtarget()).hasFullFP16(); + + if (VT == MVT::f16 && !FullFP16) { + LHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other}, + {Chain, LHS}); + RHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other}, + {LHS.getValue(1), RHS}); + Chain = RHS.getValue(1); + VT = MVT::f32; + } unsigned Opcode = IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP; return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS}); @@ -3444,8 +3430,7 @@ MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()), VT.getVectorNumElements()); if (IsStrict) { - SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, - {ExtVT, MVT::Other}, + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {ExtVT, MVT::Other}, {Op.getOperand(0), Op.getOperand(1)}); return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other}, {Ext.getValue(1), Ext.getValue(0)}); @@ -3484,8 +3469,14 @@ // f16 conversions are promoted to f32 when full fp16 is not supported. if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { - assert(!IsStrict && "Lowering of strict fp16 not yet implemented"); SDLoc dl(Op); + if (IsStrict) { + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, + {MVT::f32, MVT::Other}, + {Op.getOperand(0), SrcVal}); + return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } return DAG.getNode( Op.getOpcode(), dl, Op.getValueType(), DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal)); @@ -3714,8 +3705,14 @@ // f16 conversions are promoted to f32 when full fp16 is not supported. if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { - assert(!IsStrict && "Lowering of strict fp16 not yet implemented"); SDLoc dl(Op); + if (IsStrict) { + SDValue Val = DAG.getNode(Op.getOpcode(), dl, {MVT::f32, MVT::Other}, + {Op.getOperand(0), SrcVal}); + return DAG.getNode( + ISD::STRICT_FP_ROUND, dl, {MVT::f16, MVT::Other}, + {Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)}); + } return DAG.getNode( ISD::FP_ROUND, dl, MVT::f16, DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal), @@ -5259,6 +5256,18 @@ return LowerCTTZ(Op, DAG); case ISD::VECTOR_SPLICE: return LowerVECTOR_SPLICE(Op, DAG); + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: { + assert(Op.getOperand(1).getValueType() == MVT::f16 && + "Expected custom lowering of rounding operations only for f16"); + SDLoc DL(Op); + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } } } Index: llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll @@ -0,0 +1,1173 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 + +; Check that constrained fp intrinsics are correctly lowered. + + +; Half-precision intrinsics + +define half @add_f16(half %x, half %y) #0 { +; CHECK-NOFP16-LABEL: add_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: add_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fadd h0, h0, h1 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sub_f16(half %x, half %y) #0 { +; CHECK-NOFP16-LABEL: sub_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fsub s0, s0, s1 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: sub_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fsub h0, h0, h1 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @mul_f16(half %x, half %y) #0 { +; CHECK-NOFP16-LABEL: mul_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: mul_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmul h0, h0, h1 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @div_f16(half %x, half %y) #0 { +; CHECK-NOFP16-LABEL: div_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fdiv s0, s0, s1 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: div_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.fdiv.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @frem_f16(half %x, half %y) #0 { +; CHECK-LABEL: frem_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl fmodf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.frem.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @fma_f16(half %x, half %y, half %z) #0 { +; CHECK-NOFP16-LABEL: fma_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s2, h2 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fmadd s0, s0, s1, s2 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fma_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define i32 @fptosi_i32_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: fptosi_i32_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvtzs w0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fptosi_i32_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w0, h0 +; CHECK-FP16-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +define i32 @fptoui_i32_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: fptoui_i32_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvtzu w0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fptoui_i32_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu w0, h0 +; CHECK-FP16-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +define i64 @fptosi_i64_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: fptosi_i64_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvtzs x0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fptosi_i64_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs x0, h0 +; CHECK-FP16-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict") #0 + ret i64 %val +} + +define i64 @fptoui_i64_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: fptoui_i64_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvtzu x0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fptoui_i64_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu x0, h0 +; CHECK-FP16-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict") #0 + ret i64 %val +} + +define half @sitofp_f16_i32(i32 %x) #0 { +; CHECK-NOFP16-LABEL: sitofp_f16_i32: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: scvtf s0, w0 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: sitofp_f16_i32: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: scvtf h0, w0 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @uitofp_f16_i32(i32 %x) #0 { +; CHECK-NOFP16-LABEL: uitofp_f16_i32: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: ucvtf s0, w0 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: uitofp_f16_i32: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ucvtf h0, w0 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sitofp_f16_i64(i64 %x) #0 { +; CHECK-NOFP16-LABEL: sitofp_f16_i64: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: scvtf s0, x0 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: sitofp_f16_i64: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: scvtf h0, x0 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @uitofp_f16_i64(i64 %x) #0 { +; CHECK-NOFP16-LABEL: uitofp_f16_i64: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: ucvtf s0, x0 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: uitofp_f16_i64: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ucvtf h0, x0 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sitofp_f16_i128(i128 %x) #0 { +; CHECK-NOFP16-LABEL: sitofp_f16_i128: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NOFP16-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NOFP16-NEXT: .cfi_offset w30, -16 +; CHECK-NOFP16-NEXT: bl __floattisf +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: sitofp_f16_i128: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-FP16-NEXT: .cfi_def_cfa_offset 16 +; CHECK-FP16-NEXT: .cfi_offset w30, -16 +; CHECK-FP16-NEXT: bl __floattihf +; CHECK-FP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.sitofp.f16.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @uitofp_f16_i128(i128 %x) #0 { +; CHECK-NOFP16-LABEL: uitofp_f16_i128: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NOFP16-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NOFP16-NEXT: .cfi_offset w30, -16 +; CHECK-NOFP16-NEXT: bl __floatuntisf +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: uitofp_f16_i128: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-FP16-NEXT: .cfi_def_cfa_offset 16 +; CHECK-FP16-NEXT: .cfi_offset w30, -16 +; CHECK-FP16-NEXT: bl __floatuntihf +; CHECK-FP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.uitofp.f16.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sqrt_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: sqrt_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fsqrt s0, s0 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: sqrt_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fsqrt h0, h0 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.sqrt.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @powi_f16(half %x, i32 %y) #0 { +; CHECK-LABEL: powi_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl __powisf2 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.powi.f16(half %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sin_f16(half %x) #0 { +; CHECK-LABEL: sin_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sinf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.sin.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @cos_f16(half %x) #0 { +; CHECK-LABEL: cos_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl cosf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.cos.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @pow_f16(half %x, half %y) #0 { +; CHECK-LABEL: pow_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl powf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.pow.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @log_f16(half %x) #0 { +; CHECK-LABEL: log_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl logf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.log.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @log10_f16(half %x) #0 { +; CHECK-LABEL: log10_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl log10f +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.log10.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @log2_f16(half %x) #0 { +; CHECK-LABEL: log2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl log2f +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.log2.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @exp_f16(half %x) #0 { +; CHECK-LABEL: exp_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl expf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.exp.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @exp2_f16(half %x) #0 { +; CHECK-LABEL: exp2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl exp2f +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.exp2.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @rint_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: rint_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: frintx s0, s0 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: rint_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintx h0, h0 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.rint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @nearbyint_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: nearbyint_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: frinti s0, s0 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: nearbyint_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinti h0, h0 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.nearbyint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define i32 @lrint_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: lrint_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: frintx s0, s0 +; CHECK-NOFP16-NEXT: fcvtzs w0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: lrint_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintx h0, h0 +; CHECK-FP16-NEXT: fcvtzs w0, h0 +; CHECK-FP16-NEXT: ret + %val = call i32 @llvm.experimental.constrained.lrint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i32 %val +} + +define i64 @llrint_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: llrint_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: frintx s0, s0 +; CHECK-NOFP16-NEXT: fcvtzs x0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: llrint_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintx h0, h0 +; CHECK-FP16-NEXT: fcvtzs x0, h0 +; CHECK-FP16-NEXT: ret + %val = call i64 @llvm.experimental.constrained.llrint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i64 %val +} + +define half @maxnum_f16(half %x, half %y) #0 { +; CHECK-NOFP16-LABEL: maxnum_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fmaxnm s0, s0, s1 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: maxnum_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmaxnm h0, h0, h1 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.maxnum.f16(half %x, half %y, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @minnum_f16(half %x, half %y) #0 { +; CHECK-NOFP16-LABEL: minnum_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fminnm s0, s0, s1 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: minnum_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fminnm h0, h0, h1 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.minnum.f16(half %x, half %y, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @ceil_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: ceil_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: frintp s0, s0 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: ceil_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintp h0, h0 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.ceil.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @floor_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: floor_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: frintm s0, s0 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: floor_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintm h0, h0 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.floor.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define i32 @lround_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: lround_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvtas w0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: lround_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtas w0, h0 +; CHECK-FP16-NEXT: ret + %val = call i32 @llvm.experimental.constrained.lround.f16(half %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +define i64 @llround_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: llround_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvtas x0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: llround_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtas x0, h0 +; CHECK-FP16-NEXT: ret + %val = call i64 @llvm.experimental.constrained.llround.f16(half %x, metadata !"fpexcept.strict") #0 + ret i64 %val +} + +define half @round_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: round_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: frinta s0, s0 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: round_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinta h0, h0 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.round.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @roundeven_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: roundeven_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: frintn s0, s0 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: roundeven_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintn h0, h0 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.roundeven.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @trunc_f16(half %x) #0 { +; CHECK-NOFP16-LABEL: trunc_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: frintz s0, s0 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: trunc_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintz h0, h0 +; CHECK-FP16-NEXT: ret + %val = call half @llvm.experimental.constrained.trunc.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define i32 @fcmp_olt_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmp_olt_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmp s0, s1 +; CHECK-NOFP16-NEXT: cset w0, mi +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmp_olt_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, mi +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ole_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmp_ole_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmp s0, s1 +; CHECK-NOFP16-NEXT: cset w0, ls +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmp_ole_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, ls +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ogt_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmp_ogt_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmp s0, s1 +; CHECK-NOFP16-NEXT: cset w0, gt +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmp_ogt_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, gt +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_oge_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmp_oge_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmp s0, s1 +; CHECK-NOFP16-NEXT: cset w0, ge +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmp_oge_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, ge +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_oeq_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmp_oeq_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmp s0, s1 +; CHECK-NOFP16-NEXT: cset w0, eq +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmp_oeq_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, eq +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_one_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmp_one_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmp s0, s1 +; CHECK-NOFP16-NEXT: cset w8, mi +; CHECK-NOFP16-NEXT: csinc w0, w8, wzr, le +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmp_one_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w8, mi +; CHECK-FP16-NEXT: csinc w0, w8, wzr, le +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ult_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmp_ult_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmp s0, s1 +; CHECK-NOFP16-NEXT: cset w0, lt +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmp_ult_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, lt +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ule_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmp_ule_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmp s0, s1 +; CHECK-NOFP16-NEXT: cset w0, le +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmp_ule_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, le +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ugt_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmp_ugt_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmp s0, s1 +; CHECK-NOFP16-NEXT: cset w0, hi +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmp_ugt_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, hi +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_uge_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmp_uge_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmp s0, s1 +; CHECK-NOFP16-NEXT: cset w0, pl +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmp_uge_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, pl +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ueq_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmp_ueq_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmp s0, s1 +; CHECK-NOFP16-NEXT: cset w8, eq +; CHECK-NOFP16-NEXT: csinc w0, w8, wzr, vc +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmp_ueq_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w8, eq +; CHECK-FP16-NEXT: csinc w0, w8, wzr, vc +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_une_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmp_une_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmp s0, s1 +; CHECK-NOFP16-NEXT: cset w0, ne +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmp_une_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, ne +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_olt_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmps_olt_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmpe s0, s1 +; CHECK-NOFP16-NEXT: cset w0, mi +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmps_olt_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmpe h0, h1 +; CHECK-FP16-NEXT: cset w0, mi +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ole_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmps_ole_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmpe s0, s1 +; CHECK-NOFP16-NEXT: cset w0, ls +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmps_ole_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmpe h0, h1 +; CHECK-FP16-NEXT: cset w0, ls +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ogt_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmps_ogt_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmpe s0, s1 +; CHECK-NOFP16-NEXT: cset w0, gt +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmps_ogt_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmpe h0, h1 +; CHECK-FP16-NEXT: cset w0, gt +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_oge_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmps_oge_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmpe s0, s1 +; CHECK-NOFP16-NEXT: cset w0, ge +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmps_oge_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmpe h0, h1 +; CHECK-FP16-NEXT: cset w0, ge +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_oeq_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmps_oeq_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmpe s0, s1 +; CHECK-NOFP16-NEXT: cset w0, eq +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmps_oeq_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmpe h0, h1 +; CHECK-FP16-NEXT: cset w0, eq +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_one_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmps_one_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmpe s0, s1 +; CHECK-NOFP16-NEXT: cset w8, mi +; CHECK-NOFP16-NEXT: csinc w0, w8, wzr, le +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmps_one_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmpe h0, h1 +; CHECK-FP16-NEXT: cset w8, mi +; CHECK-FP16-NEXT: csinc w0, w8, wzr, le +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ult_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmps_ult_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmpe s0, s1 +; CHECK-NOFP16-NEXT: cset w0, lt +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmps_ult_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmpe h0, h1 +; CHECK-FP16-NEXT: cset w0, lt +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ule_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmps_ule_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmpe s0, s1 +; CHECK-NOFP16-NEXT: cset w0, le +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmps_ule_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmpe h0, h1 +; CHECK-FP16-NEXT: cset w0, le +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ugt_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmps_ugt_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmpe s0, s1 +; CHECK-NOFP16-NEXT: cset w0, hi +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmps_ugt_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmpe h0, h1 +; CHECK-FP16-NEXT: cset w0, hi +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_uge_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmps_uge_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmpe s0, s1 +; CHECK-NOFP16-NEXT: cset w0, pl +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmps_uge_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmpe h0, h1 +; CHECK-FP16-NEXT: cset w0, pl +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ueq_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmps_ueq_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmpe s0, s1 +; CHECK-NOFP16-NEXT: cset w8, eq +; CHECK-NOFP16-NEXT: csinc w0, w8, wzr, vc +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmps_ueq_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmpe h0, h1 +; CHECK-FP16-NEXT: cset w8, eq +; CHECK-FP16-NEXT: csinc w0, w8, wzr, vc +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_une_f16(half %a, half %b) #0 { +; CHECK-NOFP16-LABEL: fcmps_une_f16: +; CHECK-NOFP16: // %bb.0: +; CHECK-NOFP16-NEXT: fcvt s0, h0 +; CHECK-NOFP16-NEXT: fcvt s1, h1 +; CHECK-NOFP16-NEXT: fcmpe s0, s1 +; CHECK-NOFP16-NEXT: cset w0, ne +; CHECK-NOFP16-NEXT: ret +; +; CHECK-FP16-LABEL: fcmps_une_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmpe h0, h1 +; CHECK-FP16-NEXT: cset w0, ne +; CHECK-FP16-NEXT: ret + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + + +; Intrinsics to convert between floating-point types + +define half @fptrunc_f16_f32(float %x) #0 { +; CHECK-LABEL: fptrunc_f16_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define float @fpext_f32_f16(half %x) #0 { +; CHECK-LABEL: fpext_f32_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: ret + %val = call float @llvm.experimental.constrained.fpext.f32.f16(half %x, metadata !"fpexcept.strict") #0 + ret float %val +} + + +attributes #0 = { strictfp } + +declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fdiv.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.frem.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i128(i128, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i128(i128, metadata, metadata) +declare half @llvm.experimental.constrained.sqrt.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.powi.f16(half, i32, metadata, metadata) +declare half @llvm.experimental.constrained.sin.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.cos.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.pow.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.log.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.log10.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.log2.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.exp.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.exp2.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.f16(half, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.maxnum.f16(half, half, metadata) +declare half @llvm.experimental.constrained.minnum.f16(half, half, metadata) +declare half @llvm.experimental.constrained.ceil.f16(half, metadata) +declare half @llvm.experimental.constrained.floor.f16(half, metadata) +declare i32 @llvm.experimental.constrained.lround.f16(half, metadata) +declare i64 @llvm.experimental.constrained.llround.f16(half, metadata) +declare half @llvm.experimental.constrained.round.f16(half, metadata) +declare half @llvm.experimental.constrained.roundeven.f16(half, metadata) +declare half @llvm.experimental.constrained.trunc.f16(half, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f16(half, half, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadata) + +declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) Index: llvm/test/CodeGen/AArch64/fp-intrinsics.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp-intrinsics.ll +++ llvm/test/CodeGen/AArch64/fp-intrinsics.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=aarch64-none-eabi %s -disable-strictnode-mutation -o - | FileCheck %s -; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 -disable-strictnode-mutation %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-none-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s ; Check that constrained fp intrinsics are correctly lowered.