Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1767,6 +1767,10 @@
     case ISD::SELECT_CC:  R = PromoteFloatOp_SELECT_CC(N, OpNo); break;
     case ISD::SETCC:      R = PromoteFloatOp_SETCC(N, OpNo); break;
     case ISD::STORE:      R = PromoteFloatOp_STORE(N, OpNo); break;
+    case ISD::BUILD_VECTOR: R = PromoteFloatOp_BUILD_VECTOR(N, OpNo); break;
+    case ISD::INSERT_VECTOR_ELT:
+      R = PromoteFloatOp_INSERT_VECTOR_ELT(N, OpNo);
+      break;
   }
 
   if (R.getNode())
@@ -1861,6 +1865,29 @@
                       ST->getMemOperand());
 }
 
+SDValue DAGTypeLegalizer::PromoteFloatOp_BUILD_VECTOR(SDNode *N,
+                                                      unsigned OpNo) {
+  SmallVector<SDValue, 8> ConvertedValues;
+  llvm::transform(
+      N->op_values(), std::back_inserter(ConvertedValues),
+      [this](const SDValue &Val) { return BitConvertToInteger(Val); });
+
+  SDValue IntRes = DAG.getNode(
+      ISD::BUILD_VECTOR, SDLoc(N),
+      N->getValueType(0).changeVectorElementTypeToInteger(), ConvertedValues);
+  return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), IntRes);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatOp_INSERT_VECTOR_ELT(SDNode *N,
+                                                           unsigned OpNo) {
+  SDValue IntVec = BitConvertVectorToIntegerVector(N->getOperand(0));
+  SDValue IntElem = BitConvertToInteger(N->getOperand(1));
+  SDValue IntRes =
+      DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), IntVec.getValueType(),
+                  IntVec, IntElem, N->getOperand(2));
+  return DAG.getNode(ISD::BITCAST, SDLoc(IntVec), N->getValueType(0), IntRes);
+}
+
 //===----------------------------------------------------------------------===//
 //  Float Result Promotion
 //===----------------------------------------------------------------------===//
Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -628,6 +628,8 @@
   SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
   SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
   SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_BUILD_VECTOR(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
 
   //===--------------------------------------------------------------------===//
   // Scalarization Support: LegalizeVectorTypes.cpp
Index: lib/Target/ARM/ARMISelLowering.h
===================================================================
--- lib/Target/ARM/ARMISelLowering.h
+++ lib/Target/ARM/ARMISelLowering.h
@@ -609,6 +609,11 @@
     void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
     void addDRTypeForNEON(MVT VT);
     void addQRTypeForNEON(MVT VT);
+    /// Expand all operations (except loads, stores and basic arithmetic)
+    /// for a given FP type
+    void setFPFunctionsExpand(MVT VT);
+    /// Expand all operations (except loads and stores) for a given FP type
+    void setFPOperationsExpand(MVT VT);
     std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const;
 
     using RegsToPassVector = SmallVector<std::pair<unsigned, SDValue>, 8>;
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -220,6 +220,24 @@
   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
 }
 
+void ARMTargetLowering::setFPFunctionsExpand(MVT VT) {
+  for (ISD::NodeType Op : { ISD::FSQRT,   ISD::FSIN,    ISD::FCOS,
+                            ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
+                            ISD::FLOG10,  ISD::FEXP,    ISD::FEXP2,
+                            ISD::FCEIL,   ISD::FTRUNC,  ISD::FRINT,
+                            ISD::FNEARBYINT, ISD::FFLOOR })
+    setOperationAction(Op, VT, Expand);
+}
+
+void ARMTargetLowering::setFPOperationsExpand(MVT VT) {
+  for (ISD::NodeType Op : { ISD::FADD,      ISD::FSUB,      ISD::FMUL,
+                            ISD::FMA,       ISD::FDIV,      ISD::FREM,
+                            ISD::FCOPYSIGN, ISD::FGETSIGN,  ISD::SETCC,
+                            ISD::FNEG,      ISD::FABS })
+    setOperationAction(Op, VT, Expand);
+  setFPFunctionsExpand(VT);
+}
+
 ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
                                      const ARMSubtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
@@ -561,79 +579,35 @@
     addQRTypeForNEON(MVT::v4i32);
     addQRTypeForNEON(MVT::v2i64);
 
-    if (Subtarget->hasFullFP16()) {
-      addQRTypeForNEON(MVT::v8f16);
-      addDRTypeForNEON(MVT::v4f16);
+    // Even if the target does not support FP16 operations we want to keep
+    // <4 x half> and <8 x half> legal, because they can still be used as
+    // storage types and need to be handled correctly when passed as function
+    // parameters (the calling convention requires to treat them as
+    // containerized vectors)
+    addQRTypeForNEON(MVT::v8f16);
+    addDRTypeForNEON(MVT::v4f16);
+    if (!Subtarget->hasFullFP16()) {
+      setFPOperationsExpand(MVT::v8f16);
+      setFPOperationsExpand(MVT::v4f16);
     }
 
     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
     // neither Neon nor VFP support any arithmetic operations on it.
     // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
     // supported for v4f32.
-    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
-    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
-    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
-    // FIXME: Code duplication: FDIV and FREM are expanded always, see
-    // ARMTargetLowering::addTypeForNEON method for details.
-    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
-    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
-    // FIXME: Create unittest.
+    // FIXME: Create unittest for FCOPYSIGN.
     // In another words, find a way when "copysign" appears in DAG with vector
     // operands.
-    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
     // FIXME: Code duplication: SETCC has custom operation action, see
     // ARMTargetLowering::addTypeForNEON method for details.
-    setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
     // FIXME: Create unittest for FNEG and for FABS.
-    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
-    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
-    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
-    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
-    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
-    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
-    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
-    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
-    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
-    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
-    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
     // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
-    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
-    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
-    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
-    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
-    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
-    setOperationAction(ISD::FMA, MVT::v2f64, Expand);
-
-    setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
-    setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
-    setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
-    setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
-    setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
-    setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
-    setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
-    setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
-    setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
-    setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
-    setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
-    setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
-    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
-    setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
+    setFPOperationsExpand(MVT::v2f64);
+
+    setFPFunctionsExpand(MVT::v4f32);
 
     // Mark v2f32 intrinsics.
-    setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
-    setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
-    setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
-    setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
-    setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
-    setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
-    setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
-    setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
-    setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
-    setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
-    setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
-    setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
-    setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
-    setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
+    setFPFunctionsExpand(MVT::v2f32);
 
     // Neon does not support some operations on v1i64 and v2i64 types.
     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
@@ -732,30 +706,7 @@
     // operations, f64 is legal for the few double-precision instructions which
     // are present However, no double-precision operations other than moves,
     // loads and stores are provided by the hardware.
-    setOperationAction(ISD::FADD,       MVT::f64, Expand);
-    setOperationAction(ISD::FSUB,       MVT::f64, Expand);
-    setOperationAction(ISD::FMUL,       MVT::f64, Expand);
-    setOperationAction(ISD::FMA,        MVT::f64, Expand);
-    setOperationAction(ISD::FDIV,       MVT::f64, Expand);
-    setOperationAction(ISD::FREM,       MVT::f64, Expand);
-    setOperationAction(ISD::FCOPYSIGN,  MVT::f64, Expand);
-    setOperationAction(ISD::FGETSIGN,   MVT::f64, Expand);
-    setOperationAction(ISD::FNEG,       MVT::f64, Expand);
-    setOperationAction(ISD::FABS,       MVT::f64, Expand);
-    setOperationAction(ISD::FSQRT,      MVT::f64, Expand);
-    setOperationAction(ISD::FSIN,       MVT::f64, Expand);
-    setOperationAction(ISD::FCOS,       MVT::f64, Expand);
-    setOperationAction(ISD::FPOW,       MVT::f64, Expand);
-    setOperationAction(ISD::FLOG,       MVT::f64, Expand);
-    setOperationAction(ISD::FLOG2,      MVT::f64, Expand);
-    setOperationAction(ISD::FLOG10,     MVT::f64, Expand);
-    setOperationAction(ISD::FEXP,       MVT::f64, Expand);
-    setOperationAction(ISD::FEXP2,      MVT::f64, Expand);
-    setOperationAction(ISD::FCEIL,      MVT::f64, Expand);
-    setOperationAction(ISD::FTRUNC,     MVT::f64, Expand);
-    setOperationAction(ISD::FRINT,      MVT::f64, Expand);
-    setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
-    setOperationAction(ISD::FFLOOR,     MVT::f64, Expand);
+    setFPOperationsExpand(MVT::f64);
     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
Index: test/CodeGen/ARM/fp16-promote.ll
===================================================================
--- test/CodeGen/ARM/fp16-promote.ll
+++ test/CodeGen/ARM/fp16-promote.ll
@@ -820,15 +820,15 @@
 ; CHECK-ALL-LABEL: test_insertelement:
 ; CHECK-ALL: sub sp, sp, #8
 
-; CHECK-VFP:	and	
-; CHECK-VFP:	mov	
-; CHECK-VFP:	ldrd	
-; CHECK-VFP:	orr	
-; CHECK-VFP:	ldrh	
-; CHECK-VFP:	stm	
-; CHECK-VFP:	strh	
-; CHECK-VFP:	ldm	
-; CHECK-VFP:	stm	
+; CHECK-VFP: and
+; CHECK-VFP: mov
+; CHECK-VFP: vldr
+; CHECK-VFP: orr
+; CHECK-VFP: ldrh
+; CHECK-VFP: vstr
+; CHECK-VFP: strh
+; CHECK-VFP: vldr
+; CHECK-VFP: vstr
 
 ; CHECK-NOVFP: ldrh
 ; CHECK-NOVFP: ldrh
@@ -860,15 +860,15 @@
 }
 
 ; CHECK-ALL-LABEL: test_extractelement:
-; CHECK-VFP: push {{{.*}}, lr}
 ; CHECK-VFP: sub sp, sp, #8
-; CHECK-VFP: ldrd
+; CHECK-VFP: vldr
+; CHECK-VFP: and
 ; CHECK-VFP: mov
 ; CHECK-VFP: orr
+; CHECK-VFP: vstr
 ; CHECK-VFP: ldrh
 ; CHECK-VFP: strh
 ; CHECK-VFP: add sp, sp, #8
-; CHECK-VFP: pop {{{.*}}, pc}
 ; CHECK-NOVFP: ldrh
 ; CHECK-NOVFP: strh
 ; CHECK-NOVFP: ldrh
Index: test/CodeGen/ARM/fp16-v3.ll
===================================================================
--- test/CodeGen/ARM/fp16-v3.ll
+++ test/CodeGen/ARM/fp16-v3.ll
@@ -11,10 +11,8 @@
 ; CHECK: vadd.f32 [[SREG5:s[0-9]+]], [[SREG4]], [[SREG1]]
 ; CHECK-NEXT: vcvtb.f16.f32 [[SREG6:s[0-9]+]], [[SREG5]]
 ; CHECK-NEXT: vmov [[RREG1:r[0-9]+]], [[SREG6]]
-; CHECK-DAG: uxth [[RREG2:r[0-9]+]], [[RREG1]]
-; CHECK-DAG: pkhbt [[RREG3:r[0-9]+]], [[RREG1]], [[RREG1]], lsl #16
 ; CHECK-DAG: strh [[RREG1]], [r0, #4]
-; CHECK-DAG: vmov [[DREG:d[0-9]+]], [[RREG3]], [[RREG2]]
+; CHECK-DAG: vdup.16 [[DREG:d[0-9]+]], [[RREG1]]
 ; CHECK-DAG: vst1.32 {[[DREG]][0]}, [r0:32]
 ; CHECK-NEXT: bx lr
 define void @test_vec3(<3 x half>* %arr, i32 %i) #0 {
@@ -28,11 +26,9 @@
 }
 
 ; CHECK-LABEL: test_bitcast:
-; CHECK: vcvtb.f16.f32
-; CHECK: vcvtb.f16.f32
-; CHECK: vcvtb.f16.f32
-; CHECK: pkhbt
-; CHECK: uxth
+; CHECK-DAG: vst1.16
+; CHECK-DAG: vst1.32
+; CHECK: bx lr
 define void @test_bitcast(<3 x half> %inp, <3 x i16>* %arr) #0 {
   %bc = bitcast <3 x half> %inp to <3 x i16>
   store <3 x i16> %bc, <3 x i16>* %arr, align 8
Index: test/CodeGen/ARM/vfp16-calling-conv.ll
===================================================================
--- /dev/null
+++ test/CodeGen/ARM/vfp16-calling-conv.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7-none--eabi"
+
+@v = local_unnamed_addr global <4 x half> zeroinitializer, align 8
+
+declare void @callee(<4 x half>) #0
+
+; CHECK-LABEL: test_soften:
+; CHECK: vldr [[DREG:d[0-9]+]], {{\[r[0-9]+]}}
+; CHECK-NEXT: vmov r0, r1, [[DREG]]
+; CHECK-NEXT: b callee
+define void @test_soften() #0 {
+entry:
+  %0 = load <4 x half>, <4 x half>* @v, align 8
+  tail call void (<4 x half>) @callee(<4 x half> %0)
+  ret void
+}
+
+; CHECK-LABEL: test_illegal_op:
+; CHECK: vadd.f32
+; CHECK: vadd.f32
+; CHECK: vadd.f32
+; CHECK: vadd.f32
+; CHECK: b callee
+define void @test_illegal_op(<4 x half> %a, <4 x half> %b) #0 {
+  %c = fadd <4 x half> %a, %b
+  tail call void (<4 x half>) @callee(<4 x half> %c)
+  ret void
+}
+
+attributes #0 = { nounwind }
Index: test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
===================================================================
--- test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
+++ test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
@@ -121,12 +121,12 @@
   br label %for.body
 
 ; VF_4-LABEL: Checking a loop in "half_factor_2"
-; VF_4:         Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2
+; VF_4:         Found an estimated cost of 33 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2
 ; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2
 ; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
 ; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
 ; VF_8-LABEL: Checking a loop in "half_factor_2"
-; VF_8:         Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
+; VF_8:         Found an estimated cost of 66 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
 ; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2
 ; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
 ; VF_8-NEXT:    Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2