Index: clang/include/clang/Basic/arm_fp16.td
===================================================================
--- clang/include/clang/Basic/arm_fp16.td
+++ clang/include/clang/Basic/arm_fp16.td
@@ -14,16 +14,63 @@
 
 include "arm_neon_incl.td"
 
-// ARMv8.2-A FP16 intrinsics.
-let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in {
-
+// ARMv8.2-A FP16 intrinsics for A32/A64.
+let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)" in {
   // Negate
   def VNEGSH          : SInst<"vneg", "ss", "Sh">;
 
-  // Reciprocal/Sqrt
-  def SCALAR_FRECPSH  : IInst<"vrecps", "sss", "Sh">;
+  // Sqrt
   def FSQRTSH         : SInst<"vsqrt", "ss", "Sh">;
-  def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">;
+
+  // Rounding
+  def FRINTZ_S64H     : SInst<"vrnd", "ss", "Sh">;
+  def FRINTA_S64H     : SInst<"vrnda", "ss", "Sh">;
+  def FRINTI_S64H     : SInst<"vrndi", "ss", "Sh">;
+  def FRINTM_S64H     : SInst<"vrndm", "ss", "Sh">;
+  def FRINTN_S64H     : SInst<"vrndn", "ss", "Sh">;
+  def FRINTP_S64H     : SInst<"vrndp", "ss", "Sh">;
+  def FRINTX_S64H     : SInst<"vrndx", "ss", "Sh">;
+
+  // Conversion
+  def SCALAR_SCVTFSH  : SInst<"vcvth_f16", "Ys", "iUi">;
+  def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">;
+  def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">;
+  def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">;
+  def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">;
+  def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">;
+  def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">;
+  def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">;
+  def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">;
+  def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">;
+  def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">;
+  let isVCVT_N = 1 in {
+    def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "iUi">;
+    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">;
+    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">;
+  }
+
+  // Scalar Absolute Value
+  def SCALAR_ABSH     : SInst<"vabs", "ss", "Sh">;
+
+  // Add/Sub
+  def VADDSH          : SInst<"vadd", "sss", "Sh">;
+  def VSUBHS          : SInst<"vsub", "sss", "Sh">;
+
+  // Max/Min(nm)
+  def FMAXNMHS        : SInst<"vmaxnm", "sss", "Sh">;
+  def FMINNMHS        : SInst<"vminnm", "sss", "Sh">;
+
+  // Multiplication/Division
+  def VMULHS          : SInst<"vmul", "sss", "Sh">;
+  def FDIVHS          : SInst<"vdiv", "sss",  "Sh">;
+
+  // Vector fused multiply-add operations
+  def VFMAHS          : SInst<"vfma", "ssss", "Sh">;
+  def VFMSHS          : SInst<"vfms", "ssss", "Sh">;
+}
+
+// ARMv8.2-A FP16 intrinsics for A64 only.
+let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in {
 
   // Reciprocal Estimate
   def SCALAR_FRECPEH  : IInst<"vrecpe", "ss", "Sh">;
@@ -34,67 +81,51 @@
   // Reciprocal Square Root Estimate
   def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">;
 
-  // Rounding
-  def FRINTZ_S64H     : SInst<"vrnd", "ss", "Sh">;
-  def FRINTA_S64H     : SInst<"vrnda", "ss", "Sh">;
-  def FRINTI_S64H     : SInst<"vrndi", "ss", "Sh">;
-  def FRINTM_S64H     : SInst<"vrndm", "ss", "Sh">;
-  def FRINTN_S64H     : SInst<"vrndn", "ss", "Sh">;
-  def FRINTP_S64H     : SInst<"vrndp", "ss", "Sh">;
-  def FRINTX_S64H     : SInst<"vrndx", "ss", "Sh">;
+  // Reciprocal
+  def SCALAR_FRECPSH  : IInst<"vrecps", "sss", "Sh">;
+  def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">;
+
+  // Comparison
+  def SCALAR_CMEQRH   : SInst<"vceq", "bss", "Sh">;
+  def SCALAR_CMEQZH   : SInst<"vceqz", "bs", "Sh">;
+  def SCALAR_CMGERH   : SInst<"vcge", "bss", "Sh">;
+  def SCALAR_CMGEZH   : SInst<"vcgez", "bs", "Sh">;
+  def SCALAR_CMGTRH   : SInst<"vcgt", "bss", "Sh">;
+  def SCALAR_CMGTZH   : SInst<"vcgtz", "bs", "Sh">;
+  def SCALAR_CMLERH   : SInst<"vcle", "bss", "Sh">;
+  def SCALAR_CMLEZH   : SInst<"vclez", "bs", "Sh">;
+  def SCALAR_CMLTH    : SInst<"vclt", "bss", "Sh">;
+  def SCALAR_CMLTZH   : SInst<"vcltz", "bs", "Sh">;
 
   // Conversion
-  def SCALAR_SCVTFSH  : SInst<"vcvth_f16", "Ys", "silUsUiUl">;
+  def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "Ys", "slUsUl">;
   def SCALAR_FCVTZSH  : SInst<"vcvt_s16", "$s", "Sh">;
-  def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">;
   def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">;
   def SCALAR_FCVTZUH  : SInst<"vcvt_u16", "bs", "Sh">;
-  def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">;
   def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">;
   def SCALAR_FCVTASH  : SInst<"vcvta_s16", "$s", "Sh">;
-  def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">;
   def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">;
   def SCALAR_FCVTAUH  : SInst<"vcvta_u16", "bs", "Sh">;
-  def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">;
   def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">;
   def SCALAR_FCVTMSH  : SInst<"vcvtm_s16", "$s", "Sh">;
-  def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">;
   def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">;
   def SCALAR_FCVTMUH  : SInst<"vcvtm_u16", "bs", "Sh">;
-  def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">;
   def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">;
   def SCALAR_FCVTNSH  : SInst<"vcvtn_s16", "$s", "Sh">;
-  def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">;
   def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">;
   def SCALAR_FCVTNUH  : SInst<"vcvtn_u16", "bs", "Sh">;
-  def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">;
   def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">;
   def SCALAR_FCVTPSH  : SInst<"vcvtp_s16", "$s", "Sh">;
-  def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">;
   def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">;
   def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "bs", "Sh">;
-  def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">;
   def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">;
   let isVCVT_N = 1 in {
-    def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "silUsUiUl">;
+    def SCALAR_SCVTFSHO1: SInst<"vcvth_n_f16", "Ysi", "slUsUl">;
     def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">;
-    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">;
     def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">;
     def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">;
-    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">;
     def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">;
   }
-  // Comparison
-  def SCALAR_CMEQRH   : SInst<"vceq", "bss", "Sh">;
-  def SCALAR_CMEQZH   : SInst<"vceqz", "bs", "Sh">;
-  def SCALAR_CMGERH   : SInst<"vcge", "bss", "Sh">;
-  def SCALAR_CMGEZH   : SInst<"vcgez", "bs", "Sh">;
-  def SCALAR_CMGTRH   : SInst<"vcgt", "bss", "Sh">;
-  def SCALAR_CMGTZH   : SInst<"vcgtz", "bs", "Sh">;
-  def SCALAR_CMLERH   : SInst<"vcle", "bss", "Sh">;
-  def SCALAR_CMLEZH   : SInst<"vclez", "bs", "Sh">;
-  def SCALAR_CMLTH    : SInst<"vclt", "bss", "Sh">;
-  def SCALAR_CMLTZH   : SInst<"vcltz", "bs", "Sh">;
 
   // Absolute Compare Mask Greater Than Or Equal
   def SCALAR_FACGEH   : IInst<"vcage", "bss", "Sh">;
@@ -104,28 +135,13 @@
   def SCALAR_FACGT    : IInst<"vcagt", "bss", "Sh">;
   def SCALAR_FACLT    : IInst<"vcalt", "bss", "Sh">;
 
-  // Scalar Absolute Value
-  def SCALAR_ABSH     : SInst<"vabs", "ss", "Sh">;
-
   // Scalar Absolute Difference
   def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">;
 
-  // Add/Sub
-  def VADDSH          : SInst<"vadd", "sss", "Sh">;
-  def VSUBHS          : SInst<"vsub", "sss", "Sh">;
-
   // Max/Min
   def VMAXHS          : SInst<"vmax", "sss", "Sh">;
   def VMINHS          : SInst<"vmin", "sss", "Sh">;
-  def FMAXNMHS        : SInst<"vmaxnm", "sss", "Sh">;
-  def FMINNMHS        : SInst<"vminnm", "sss", "Sh">;
 
-  // Multiplication/Division
-  def VMULHS          : SInst<"vmul", "sss", "Sh">;
+  // Multiplication
   def MULXHS          : SInst<"vmulx", "sss", "Sh">;
-  def FDIVHS          : SInst<"vdiv", "sss",  "Sh">;
-
-  // Vector fused multiply-add operations
-  def VFMAHS          : SInst<"vfma", "ssss", "Sh">;
-  def VFMSHS          : SInst<"vfms", "ssss", "Sh">;
 }
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -4221,6 +4221,21 @@
   NEONMAP0(vzipq_v)
 };
 
+static const NeonIntrinsicInfo ARMSISDIntrinsicMap [] = {
+  NEONMAP1(vcvtah_s32_f16, arm_neon_vcvtas, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtah_u32_f16, arm_neon_vcvtau, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_f16_s32, arm_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_f16_u32, arm_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_s32_f16, arm_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_u32_f16, arm_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtmh_s32_f16, arm_neon_vcvtms, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtmh_u32_f16, arm_neon_vcvtmu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtnh_s32_f16, arm_neon_vcvtns, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtnh_u32_f16, arm_neon_vcvtnu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtph_s32_f16, arm_neon_vcvtps, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtph_u32_f16, arm_neon_vcvtpu, AddRetType | Add1ArgType),
+};
+
 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
@@ -5906,6 +5921,121 @@
     }
   }
 
+  // Emit the ARM SISD builtins wih identical semantics.
+  auto SISDMap = makeArrayRef(ARMSISDIntrinsicMap);
+  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(SISDMap, BuiltinID,
+      AArch64SISDIntrinsicsProvenSorted);
+  if (Builtin) {
+    Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
+    Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
+    assert(Result && "SISD intrinsic should have been handled");
+    return Result;
+  }
+
+  // fp16 scalar intrinscs
+  bool usgn = false;
+  switch (BuiltinID) {
+  default: break;
+  case NEON::BI__builtin_neon_vabsh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
+  case NEON::BI__builtin_neon_vcvth_u32_f16:
+    usgn = true;
+    // FALL THROUGH
+  case NEON::BI__builtin_neon_vcvth_s32_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+    if (usgn)
+      return Builder.CreateFPToUI(Ops[0], Int32Ty);
+    return Builder.CreateFPToSI(Ops[0], Int32Ty);
+  }
+  case NEON::BI__builtin_neon_vcvth_f16_u32:
+    usgn = true;
+    // FALL THROUGH
+  case NEON::BI__builtin_neon_vcvth_f16_s32: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = Builder.CreateBitCast(Ops[0], Int32Ty);
+    if (usgn)
+      return Builder.CreateUIToFP(Ops[0], HalfTy);
+    return Builder.CreateSIToFP(Ops[0], HalfTy);
+  }
+  case NEON::BI__builtin_neon_vnegh_f16:
+    return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
+  case NEON::BI__builtin_neon_vrndh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::trunc;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
+  }
+  case NEON::BI__builtin_neon_vrndah_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::round;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
+  }
+  case NEON::BI__builtin_neon_vrndih_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::nearbyint;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
+  }
+  case NEON::BI__builtin_neon_vrndmh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::floor;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
+  }
+  case NEON::BI__builtin_neon_vrndnh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::arm_neon_vrintn;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
+  }
+  case NEON::BI__builtin_neon_vrndph_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::ceil;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
+  }
+  case NEON::BI__builtin_neon_vrndxh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::rint;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
+  }
+  case NEON::BI__builtin_neon_vsqrth_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    unsigned Int = Intrinsic::sqrt;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
+  }
+  case NEON::BI__builtin_neon_vaddh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
+  case NEON::BI__builtin_neon_vsubh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
+  case NEON::BI__builtin_neon_vmulh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
+  case NEON::BI__builtin_neon_vdivh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
+  case NEON::BI__builtin_neon_vminnmh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    unsigned Int = Intrinsic::arm_neon_vminnm;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
+  }
+  case NEON::BI__builtin_neon_vmaxnmh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    unsigned Int = Intrinsic::arm_neon_vmaxnm;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
+  }
+  case NEON::BI__builtin_neon_vfmah_f16: {
+    Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
+    return Builder.CreateCall(F,
+      {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
+  }
+  case NEON::BI__builtin_neon_vfmsh_f16: {
+    Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
+    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
+    Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
+    return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
+  }
+  }
+
   switch (BuiltinID) {
   default: break;
 
@@ -6013,7 +6143,7 @@
 
   // Determine the type of this overloaded NEON intrinsic.
   NeonTypeFlags Type(Result.getZExtValue());
-  bool usgn = Type.isUnsigned();
+  usgn = Type.isUnsigned();
   bool rightShift = false;
 
   llvm::VectorType *VTy = GetNeonType(this, Type,
@@ -6025,7 +6155,7 @@
   // Many NEON builtins have identical semantics and uses in ARM and
   // AArch64. Emit these in a single function.
   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
-  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
+  Builtin = findNeonIntrinsicInMap(
       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
   if (Builtin)
     return EmitCommonNeonBuiltinExpr(
Index: clang/test/CodeGen/arm-v8.2a-fp16-intrinsics.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/arm-v8.2a-fp16-intrinsics.c
@@ -0,0 +1,225 @@
+// RUN: %clang_cc1 -triple armv8.2a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature +fullfp16 \
+// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \
+// RUN: | opt -S -mem2reg \
+// RUN: | FileCheck %s
+
+// REQUIRES: arm-registered-target
+
+#include <arm_fp16.h>
+
+// CHECK-LABEL: test_vabsh_f16
+// CHECK:  [[ABS:%.*]] =  call half @llvm.fabs.f16(half %{{.*}})
+float16_t test_vabsh_f16(float16_t a) {
+  return vabsh_f16(a);
+}
+
+// CHECK-LABEL: test_vcvth_f16_s32
+// CHECK:  [[VCVT:%.*]] = sitofp i32 %a to half
+float16_t test_vcvth_f16_s32 (int32_t a) {
+  return vcvth_f16_s32(a);
+}
+
+// CHECK-LABEL: test_vcvth_f16_u32
+// CHECK:  [[VCVT:%.*]] = uitofp i32 %a to half
+float16_t test_vcvth_f16_u32 (uint32_t a) {
+  return vcvth_f16_u32(a);
+}
+
+// CHECK-LABEL: test_vcvth_s32_f16
+// CHECK:  [[VCVT:%.*]] = fptosi half %{{.*}} to i32
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvth_s32_f16 (float16_t a) {
+  return vcvth_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvth_u32_f16
+// CHECK:  [[VCVT:%.*]] = fptoui half %{{.*}} to i32
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvth_u32_f16 (float16_t a) {
+  return vcvth_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtah_s32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtas.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvtah_s32_f16 (float16_t a) {
+  return vcvtah_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtah_u32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtau.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvtah_u32_f16 (float16_t a) {
+  return vcvtah_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmh_s32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtms.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvtmh_s32_f16 (float16_t a) {
+  return vcvtmh_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmh_u32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtmu.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvtmh_u32_f16 (float16_t a) {
+  return vcvtmh_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnh_s32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtns.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvtnh_s32_f16 (float16_t a) {
+  return vcvtnh_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnh_u32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtnu.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvtnh_u32_f16 (float16_t a) {
+  return vcvtnh_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtph_s32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtps.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvtph_s32_f16 (float16_t a) {
+  return vcvtph_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtph_u32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.arm.neon.vcvtpu.i32.f16(half %{{.*}})
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvtph_u32_f16 (float16_t a) {
+  return vcvtph_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vnegh_f16
+// CHECK: [[NEG:%.*]] = fsub half 0xH8000, %a
+float16_t test_vnegh_f16(float16_t a) {
+  return vnegh_f16(a);
+}
+
+// CHECK-LABEL: test_vrndh_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.trunc.f16(half %{{.*}})
+float16_t test_vrndh_f16(float16_t a) {
+  return vrndh_f16(a);
+}
+
+// CHECK-LABEL: test_vrndah_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.round.f16(half %{{.*}})
+float16_t test_vrndah_f16(float16_t a) {
+  return vrndah_f16(a);
+}
+
+// CHECK-LABEL: test_vrndih_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.nearbyint.f16(half %{{.*}})
+float16_t test_vrndih_f16(float16_t a) {
+  return vrndih_f16(a);
+}
+
+// CHECK-LABEL: test_vrndmh_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.floor.f16(half %{{.*}})
+float16_t test_vrndmh_f16(float16_t a) {
+  return vrndmh_f16(a);
+}
+
+// CHECK-LABEL: test_vrndnh_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.arm.neon.vrintn.f16(half %{{.*}})
+float16_t test_vrndnh_f16(float16_t a) {
+  return vrndnh_f16(a);
+}
+
+// CHECK-LABEL: test_vrndph_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.ceil.f16(half %{{.*}})
+float16_t test_vrndph_f16(float16_t a) {
+  return vrndph_f16(a);
+}
+
+// CHECK-LABEL: test_vrndxh_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.rint.f16(half %{{.*}})
+float16_t test_vrndxh_f16(float16_t a) {
+  return vrndxh_f16(a);
+}
+
+// CHECK-LABEL: test_vsqrth_f16
+// CHECK:  [[SQR:%.*]] = call half @llvm.sqrt.f16(half %{{.*}})
+float16_t test_vsqrth_f16(float16_t a) {
+  return vsqrth_f16(a);
+}
+
+// CHECK-LABEL: test_vaddh_f16
+// CHECK:  [[ADD:%.*]] = fadd half [[A:%.*]], [[B:%.*]]
+float16_t test_vaddh_f16(float16_t a, float16_t b) {
+  return vaddh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcvth_n_f16_s32
+// CHECK:  [[CVT:%.*]] = call half @llvm.arm.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1)
+float16_t test_vcvth_n_f16_s32(int32_t a) {
+  return vcvth_n_f16_s32(a, 1);
+}
+
+// CHECK-LABEL: test_vcvth_n_s32_f16
+// CHECK:  [[CVT:%.*]] = call i32 @llvm.arm.neon.vcvtfp2fxs.i32.f16(half {{%.*}}, i32 1)
+// CHECK:  ret i32 [[CVT]]
+int32_t test_vcvth_n_s32_f16(float16_t a) {
+  return vcvth_n_s32_f16(a, 1);
+}
+
+// CHECK-LABEL: test_vcvth_n_f16_u32
+// CHECK:  [[CVT:%.*]] = call half @llvm.arm.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1)
+float16_t test_vcvth_n_f16_u32(int32_t a) {
+  return vcvth_n_f16_u32(a, 1);
+}
+
+// CHECK-LABEL: test_vcvth_n_u32_f16
+// CHECK:  [[CVT:%.*]] = call i32 @llvm.arm.neon.vcvtfp2fxu.i32.f16(half {{%.*}}, i32 1)
+// CHECK:  ret i32 [[CVT]]
+int32_t test_vcvth_n_u32_f16(float16_t a) {
+  return vcvth_n_u32_f16(a, 1);
+}
+
+// CHECK-LABEL: test_vdivh_f16
+// CHECK:  [[DIV:%.*]] = fdiv half [[A:%.*]], [[B:%.*]]
+float16_t test_vdivh_f16(float16_t a, float16_t b) {
+  return vdivh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmaxnmh_f16
+// CHECK:  [[MAX:%.*]] = call half @llvm.arm.neon.vmaxnm.f16(half [[A:%.*]], half [[B:%.*]])
+float16_t test_vmaxnmh_f16(float16_t a, float16_t b) {
+  return vmaxnmh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vminnmh_f16
+// CHECK:  [[MIN:%.*]] = call half @llvm.arm.neon.vminnm.f16(half [[A:%.*]], half [[B:%.*]])
+float16_t test_vminnmh_f16(float16_t a, float16_t b) {
+  return vminnmh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulh_f16
+// CHECK:  [[MUL:%.*]] = fmul half [[A:%.*]], [[B:%.*]]
+float16_t test_vmulh_f16(float16_t a, float16_t b) {
+  return vmulh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vsubh_f16
+// CHECK:  [[SUB:%.*]] = fsub half [[A:%.*]], [[B:%.*]]
+float16_t test_vsubh_f16(float16_t a, float16_t b) {
+  return vsubh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vfmah_f16
+// CHECK:  [[FMA:%.*]] = call half @llvm.fma.f16(half [[A:%.*]], half [[B:%.*]], half [[C:%.*]])
+float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) {
+  return vfmah_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmsh_f16
+// CHECK:  [[SUB:%.*]] = fsub half 0xH8000, %b
+// CHECK:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[B:%.*]], half [[C:%.*]])
+float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
+  return vfmsh_f16(a, b, c);
+}
Index: llvm/include/llvm/IR/IntrinsicsARM.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsARM.td
+++ llvm/include/llvm/IR/IntrinsicsARM.td
@@ -355,6 +355,9 @@
 class Neon_2Arg_Intrinsic
   : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
               [IntrNoMem]>;
+class Float_2Arg_Intrinsic
+  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+              [IntrNoMem]>;
 class Neon_2Arg_Narrow_Intrinsic
   : Intrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>, LLVMExtendedType<0>],
               [IntrNoMem]>;
@@ -377,8 +380,8 @@
   : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
 class Neon_CvtFPToFx_Intrinsic
   : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
-class Neon_CvtFPtoInt_1Arg_Intrinsic
-  : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+class CvtFPtoInt_1Arg_Intrinsic
+  : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
 
 class Neon_Compare_Intrinsic
   : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
@@ -431,12 +434,12 @@
   // Vector Maximum.
   def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic;
   def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic;
-  def int_arm_neon_vmaxnm : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vmaxnm : Float_2Arg_Intrinsic;
 
   // Vector Minimum.
   def int_arm_neon_vmins : Neon_2Arg_Intrinsic;
   def int_arm_neon_vminu : Neon_2Arg_Intrinsic;
-  def int_arm_neon_vminnm : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vminnm : Float_2Arg_Intrinsic;
 
   // Vector Reciprocal Step.
   def int_arm_neon_vrecps : Neon_2Arg_Intrinsic;
@@ -552,15 +555,15 @@
 // Vector Reciprocal Square Root Estimate.
 def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic;
 
-// Vector Conversions Between Floating-point and Integer
-def int_arm_neon_vcvtau : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtas : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtnu : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtns : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtpu : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtps : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtmu : Neon_CvtFPtoInt_1Arg_Intrinsic;
-def int_arm_neon_vcvtms : Neon_CvtFPtoInt_1Arg_Intrinsic;
+// Conversions Between Floating-point and Integer
+def int_arm_neon_vcvtau : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtas : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtnu : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtns : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtpu : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtps : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtmu : CvtFPtoInt_1Arg_Intrinsic;
+def int_arm_neon_vcvtms : CvtFPtoInt_1Arg_Intrinsic;
 
 // Vector Conversions Between Floating-point and Fixed-point.
 def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic;