Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -70,9 +70,13 @@
     case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
     case ISD::BITCAST:     R = SoftenFloatRes_BITCAST(N, ResNo); break;
     case ISD::BUILD_PAIR:  R = SoftenFloatRes_BUILD_PAIR(N); break;
+    case ISD::BUILD_VECTOR:
+      R = SoftenFloatRes_BUILD_VECTOR(N); break;
     case ISD::ConstantFP:  R = SoftenFloatRes_ConstantFP(N, ResNo); break;
     case ISD::EXTRACT_VECTOR_ELT:
       R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;
+    case ISD::INSERT_VECTOR_ELT:
+      R = SoftenFloatRes_INSERT_VECTOR_ELT(N); break;
     case ISD::FABS:        R = SoftenFloatRes_FABS(N, ResNo); break;
     case ISD::FMINNUM:     R = SoftenFloatRes_FMINNUM(N); break;
     case ISD::FMAXNUM:     R = SoftenFloatRes_FMAXNUM(N); break;
@@ -144,6 +148,18 @@
                      BitConvertToInteger(N->getOperand(1)));
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_VECTOR(SDNode *N) {
+  SmallVector<SDValue, 8> ConvertedValues;
+  llvm::transform(
+      N->op_values(), std::back_inserter(ConvertedValues),
+      [this](const SDValue &Val) { return BitConvertToInteger(Val); });
+
+  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+                     TLI.getTypeToTransformTo(*DAG.getContext(),
+                                              N->getValueType(0)),
+                     ConvertedValues);
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
   // When LegalInHWReg, we can load better from the constant pool.
   if (isLegalInHWReg(N->getValueType(ResNo)))
@@ -181,6 +197,13 @@
                      NewOp, N->getOperand(1));
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatRes_INSERT_VECTOR_ELT(SDNode *N) {
+  SDValue NewVec = BitConvertVectorToIntegerVector(N->getOperand(0));
+  SDValue NewElem = BitConvertToInteger(N->getOperand(1));
+  return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), NewVec.getValueType(),
+                     NewVec, NewElem, N->getOperand(2));
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) {
   // When LegalInHWReg, FABS can be implemented as native bitwise operations.
   if (isLegalInHWReg(N->getValueType(ResNo)))
Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -470,8 +470,10 @@
   SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
   SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
   SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+  SDValue SoftenFloatRes_BUILD_VECTOR(SDNode *N);
   SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
   SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo);
+  SDValue SoftenFloatRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
   SDValue SoftenFloatRes_FMINNUM(SDNode *N);
   SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
Index: lib/CodeGen/TargetLoweringBase.cpp
===================================================================
--- lib/CodeGen/TargetLoweringBase.cpp
+++ lib/CodeGen/TargetLoweringBase.cpp
@@ -1165,6 +1165,22 @@
     bool IsLegalWiderType = false;
     LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
     switch (PreferredAction) {
+    case TypeSoftenFloat: {
+      MVT SoftEltVT = MVT::getIntegerVT(EltVT.getSizeInBits());
+      MVT SoftVT = MVT::getVectorVT(SoftEltVT, NElts);
+      if (isTypeLegal(SoftVT))  {
+        unsigned ToInd = (unsigned)SoftVT.SimpleTy;
+        assert(ToInd < i && "FP types precede integer types in MVT?");
+        TransformToType[i] = SoftVT;
+        RegisterTypeForVT[i] = RegisterTypeForVT[ToInd];
+        NumRegistersForVT[i] = NumRegistersForVT[ToInd];
+        ValueTypeActions.setTypeAction(VT, TypeSoftenFloat);
+        break;
+      }
+
+      LLVM_FALLTHROUGH;
+    }
+
     case TypePromoteInteger:
       // Try to promote the elements of integer vectors. If no legal
       // promotion was found, fall through to the widen-vector method.
Index: lib/Target/ARM/ARMISelLowering.h
===================================================================
--- lib/Target/ARM/ARMISelLowering.h
+++ lib/Target/ARM/ARMISelLowering.h
@@ -497,6 +497,9 @@
     bool functionArgumentNeedsConsecutiveRegisters(
         Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
 
+    TargetLoweringBase::LegalizeTypeAction
+    getPreferredVectorAction(EVT VT) const override;
+
     /// If a physical register, this returns the register that receives the
     /// exception address on entry to an EH pad.
     unsigned
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -14846,6 +14846,13 @@
   return IsHA || IsIntArray;
 }
 
+TargetLoweringBase::LegalizeTypeAction
+ARMTargetLowering::getPreferredVectorAction(EVT VT) const {
+  if (VT.isFloatingPoint() && VT.getScalarSizeInBits() == 16)
+    return TargetLoweringBase::LegalizeTypeAction::TypeSoftenFloat;
+  return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
 unsigned ARMTargetLowering::getExceptionPointerRegister(
     const Constant *PersonalityFn) const {
   // Platforms which do not use SjLj EH may return values in these registers
Index: test/CodeGen/ARM/fp16-promote.ll
===================================================================
--- test/CodeGen/ARM/fp16-promote.ll
+++ test/CodeGen/ARM/fp16-promote.ll
@@ -820,15 +820,15 @@
 ; CHECK-ALL-LABEL: test_insertelement:
 ; CHECK-ALL: sub sp, sp, #8
 
-; CHECK-VFP:	and	
-; CHECK-VFP:	mov	
-; CHECK-VFP:	ldrd	
-; CHECK-VFP:	orr	
-; CHECK-VFP:	ldrh	
-; CHECK-VFP:	stm	
-; CHECK-VFP:	strh	
-; CHECK-VFP:	ldm	
-; CHECK-VFP:	stm	
+; CHECK-VFP: and
+; CHECK-VFP: mov
+; CHECK-VFP: vldr
+; CHECK-VFP: orr
+; CHECK-VFP: ldrh
+; CHECK-VFP: vstr
+; CHECK-VFP: strh
+; CHECK-VFP: vldr
+; CHECK-VFP: vstr
 
 ; CHECK-NOVFP: ldrh
 ; CHECK-NOVFP: ldrh
@@ -860,15 +860,15 @@
 }
 
 ; CHECK-ALL-LABEL: test_extractelement:
-; CHECK-VFP: push {{{.*}}, lr}
 ; CHECK-VFP: sub sp, sp, #8
-; CHECK-VFP: ldrd
+; CHECK-VFP: vldr
+; CHECK-VFP: and
 ; CHECK-VFP: mov
 ; CHECK-VFP: orr
+; CHECK-VFP: vstr
 ; CHECK-VFP: ldrh
 ; CHECK-VFP: strh
 ; CHECK-VFP: add sp, sp, #8
-; CHECK-VFP: pop {{{.*}}, pc}
 ; CHECK-NOVFP: ldrh
 ; CHECK-NOVFP: strh
 ; CHECK-NOVFP: ldrh
Index: test/CodeGen/ARM/fp16-soften.ll
===================================================================
--- /dev/null
+++ test/CodeGen/ARM/fp16-soften.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7-none--eabi"
+
+@v = local_unnamed_addr global <4 x half> zeroinitializer, align 8
+
+declare void @callee(<4 x half>) #0
+
+; CHECK-LABEL: test_soften:
+; CHECK: vldr [[DREG:d[0-9]+]], {{\[r[0-9]+]}}
+; CHECK-NEXT: vmov r0, r1, [[DREG]]
+; CHECK-NEXT: b callee
+define void @test_soften() #0 {
+entry:
+  %0 = load <4 x half>, <4 x half>* @v, align 8
+  tail call void (<4 x half>) @callee(<4 x half> %0)
+  ret void
+}
+
+attributes #0 = { nounwind }
Index: test/CodeGen/ARM/fp16-v3.ll
===================================================================
--- test/CodeGen/ARM/fp16-v3.ll
+++ test/CodeGen/ARM/fp16-v3.ll
@@ -11,10 +11,8 @@
 ; CHECK: vadd.f32 [[SREG5:s[0-9]+]], [[SREG4]], [[SREG1]]
 ; CHECK-NEXT: vcvtb.f16.f32 [[SREG6:s[0-9]+]], [[SREG5]]
 ; CHECK-NEXT: vmov [[RREG1:r[0-9]+]], [[SREG6]]
-; CHECK-DAG: uxth [[RREG2:r[0-9]+]], [[RREG1]]
-; CHECK-DAG: pkhbt [[RREG3:r[0-9]+]], [[RREG1]], [[RREG1]], lsl #16
 ; CHECK-DAG: strh [[RREG1]], [r0, #4]
-; CHECK-DAG: vmov [[DREG:d[0-9]+]], [[RREG3]], [[RREG2]]
+; CHECK-DAG: vdup.16 [[DREG:d[0-9]+]], [[RREG1]]
 ; CHECK-DAG: vst1.32 {[[DREG]][0]}, [r0:32]
 ; CHECK-NEXT: bx lr
 define void @test_vec3(<3 x half>* %arr, i32 %i) #0 {
@@ -30,9 +28,11 @@
 ; CHECK-LABEL: test_bitcast:
 ; CHECK: vcvtb.f16.f32
 ; CHECK: vcvtb.f16.f32
+; CHECK: vmov.16
 ; CHECK: vcvtb.f16.f32
-; CHECK: pkhbt
-; CHECK: uxth
+; CHECK: vmov.16
+; CHECK: vst1.32
+; CHECK: strh
 define void @test_bitcast(<3 x half> %inp, <3 x i16>* %arr) #0 {
   %bc = bitcast <3 x half> %inp to <3 x i16>
   store <3 x i16> %bc, <3 x i16>* %arr, align 8
Index: test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
===================================================================
--- test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
+++ test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
@@ -121,12 +121,12 @@
   br label %for.body
 
 ; VF_4-LABEL: Checking a loop in "half_factor_2"
-; VF_4:         Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2
+; VF_4:         Found an estimated cost of 33 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2
 ; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2
 ; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
 ; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
 ; VF_8-LABEL: Checking a loop in "half_factor_2"
-; VF_8:         Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
+; VF_8:         Found an estimated cost of 66 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
 ; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2
 ; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
 ; VF_8-NEXT:    Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2