Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -418,6 +418,15 @@
       SoftenFloatResult(Op.getNode(), 0);
   }
 
+  if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
+    Op = GetPromotedFloat(Op);
+    // If the promotion did the FP_EXTEND to the destination type for us,
+    // there's nothing left to do here.
+    if (Op.getValueType() == N->getValueType(0)) {
+      return BitConvertToInteger(Op);
+    }
+  }
+
   RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
   if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat)
     Op = GetSoftenedFloat(Op);
Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -198,6 +198,14 @@
   if (PartEVT == ValueVT)
     return Val;
 
+  if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
+      ValueVT.bitsLT(PartEVT)) {
+    // For an FP value in an integer part, we need to truncate to the right
+    // width first.
+    PartEVT = EVT::getIntegerVT(*DAG.getContext(),  ValueVT.getSizeInBits());
+    Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
+  }
+
   if (PartEVT.isInteger() && ValueVT.isInteger()) {
     if (ValueVT.bitsLT(PartEVT)) {
       // For a truncate, see if we have any information to
@@ -384,6 +392,12 @@
       assert(NumParts == 1 && "Do not know what to promote to!");
       Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
     } else {
+      if (ValueVT.isFloatingPoint()) {
+        // FP values need to be bitcast, then extended if they are being put
+        // into a larger container.
+        ValueVT = EVT::getIntegerVT(*DAG.getContext(),  ValueVT.getSizeInBits());
+        Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+      }
       assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
              ValueVT.isInteger() &&
              "Unknown mismatch!");
Index: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
+++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
@@ -1277,20 +1277,14 @@
     ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
   }
 
+  // Decide how to handle f16. If the target does not have native f16 support,
+  // promote it to f32, because there are no f16 library calls (except for
+  // conversions).
   if (!isTypeLegal(MVT::f16)) {
-    // If the target has native f32 support, promote f16 operations to f32.  If
-    // f32 is not supported, generate soft float library calls.
-    if (isTypeLegal(MVT::f32)) {
-      NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
-      RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
-      TransformToType[MVT::f16] = MVT::f32;
-      ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
-    } else {
-      NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
-      RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
-      TransformToType[MVT::f16] = MVT::i16;
-      ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat);
-    }
+    NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
+    RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
+    TransformToType[MVT::f16] = MVT::f32;
+    ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
   }
 
   // Loop over all of the vector value types to see which need transformations.
Index: llvm/trunk/test/CodeGen/ARM/fp16-promote.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/fp16-promote.ll
+++ llvm/trunk/test/CodeGen/ARM/fp16-promote.ll
@@ -1,18 +1,18 @@
-; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 -check-prefix=CHECK-ALL
-; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL -check-prefix=CHECK-ALL
+; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16  --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL
+; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL --check-prefix=CHECK-LIBCALL-VFP
+; RUN: llc -asm-verbose=false < %s -mattr=-vfp2 | FileCheck %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
 target triple = "armv7---eabihf"
 
-; CHECK-FP16-LABEL: test_fadd:
+; CHECK-ALL-LABEL: test_fadd:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vadd.f32
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fadd:
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vadd.f32
+; CHECK-VFP: vadd.f32
+; CHECK-NOVFP: bl __aeabi_fadd
+; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fadd(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
@@ -22,15 +22,14 @@
   ret void
 }
 
-; CHECK-FP16-LABEL: test_fsub:
+; CHECK-ALL-LABEL: test_fsub:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vsub.f32
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fsub:
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vsub.f32
+; CHECK-VFP: vsub.f32
+; CHECK-NOVFP: bl __aeabi_fsub
+; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fsub(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
@@ -40,15 +39,14 @@
   ret void
 }
 
-; CHECK-FP16-LABEL: test_fmul:
+; CHECK-ALL-LABEL: test_fmul:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vmul.f32
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fmul
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vmul.f32
+; CHECK-VFP: vmul.f32
+; CHECK-NOVFP: bl __aeabi_fmul
+; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fmul(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
@@ -58,15 +56,14 @@
   ret void
 }
 
-; CHECK-FP16-LABEL: test_fdiv:
+; CHECK-ALL-LABEL: test_fdiv:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vdiv.f32
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fdiv
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vdiv.f32
+; CHECK-VFP: vdiv.f32
+; CHECK-NOVFP: bl __aeabi_fdiv
+; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fdiv(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
@@ -76,15 +73,13 @@
   ret void
 }
 
-; CHECK-FP16-LABEL: test_frem:
+; CHECK-ALL-LABEL: test_frem:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: bl fmodf
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_frem
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl fmodf
+; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_frem(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
@@ -96,9 +91,8 @@
 
 ; CHECK-ALL-LABEL: test_load_store:
 ; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: ldrh r0, [r0]
-; CHECK-ALL-NEXT: strh r0, [r1]
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}]
+; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}]
 define void @test_load_store(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   store half %a, half* %q
@@ -125,9 +119,12 @@
 ; CHECK-ALL-NEXT: .fnstart
 ; CHECK-ALL-NEXT: .save {r11, lr}
 ; CHECK-ALL-NEXT: push {r11, lr}
-; CHECK-ALL-NEXT: vmov.f32 s2, s0
-; CHECK-ALL-NEXT: vmov.f32 s0, s1
-; CHECK-ALL-NEXT: vmov.f32 s1, s2
+; CHECK-VFP-NEXT: vmov.f32 s2, s0
+; CHECK-VFP-NEXT: vmov.f32 s0, s1
+; CHECK-VFP-NEXT: vmov.f32 s1, s2
+; CHECK-NOVFP-NEXT: mov r2, r0
+; CHECK-NOVFP-NEXT: mov r0, r1
+; CHECK-NOVFP-NEXT: mov r1, r2
 ; CHECK-ALL-NEXT: bl test_callee
 ; CHECK-ALL-NEXT: pop {r11, pc}
 define half @test_call_flipped(half %a, half %b) #0 {
@@ -137,9 +134,12 @@
 
 ; CHECK-ALL-LABEL: test_tailcall_flipped:
 ; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: vmov.f32 s2, s0
-; CHECK-ALL-NEXT: vmov.f32 s0, s1
-; CHECK-ALL-NEXT: vmov.f32 s1, s2
+; CHECK-VFP-NEXT: vmov.f32 s2, s0
+; CHECK-VFP-NEXT: vmov.f32 s0, s1
+; CHECK-VFP-NEXT: vmov.f32 s1, s2
+; CHECK-NOVFP-NEXT: mov r2, r0
+; CHECK-NOVFP-NEXT: mov r0, r1
+; CHECK-NOVFP-NEXT: mov r1, r2
 ; CHECK-ALL-NEXT: b test_callee
 define half @test_tailcall_flipped(half %a, half %b) #0 {
   %r = tail call half @test_callee(half %b, half %a)
@@ -149,12 +149,10 @@
 ; Optimizer picks %p or %q based on %c and only loads that value
 ; No conversion is needed
 ; CHECK-ALL-LABEL: test_select:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: cmp r2, #0
-; CHECK-ALL-NEXT: movne r1, r0
-; CHECK-ALL-NEXT: ldrh r1, [r1]
-; CHECK-ALL-NEXT: strh r1, [r0]
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: cmp {{r[0-9]+}}, #0
+; CHECK-ALL: movne {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}]
+; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}]
 define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -165,17 +163,15 @@
 
 ; Test only two variants of fcmp.  These get translated to f32 vcmpe
 ; instructions anyway.
-; CHECK-FP16-LABEL: test_fcmp_une:
+; CHECK-ALL-LABEL: test_fcmp_une:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcmpe.f32
-; CHECK-FP16: vmrs APSR_nzcv, fpscr
-; CHECK-FP16: movwne
-; CHECK-LIBCALL-LABEL: test_fcmp_une:
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vcmpe.f32
-; CHECK-LIBCALL: movwne
+; CHECK-VFP: vcmpe.f32
+; CHECK-NOVFP: bl __aeabi_fcmpeq
+; CHECK-FP16: vmrs APSR_nzcv, fpscr
+; CHECK-ALL: movw{{ne|eq}}
 define i1 @test_fcmp_une(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -183,18 +179,15 @@
   ret i1 %r
 }
 
-; CHECK-FP16-LABEL: test_fcmp_ueq:
+; CHECK-ALL-LABEL: test_fcmp_ueq:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcmpe.f32
-; CHECK-FP16: vmrs APSR_nzcv, fpscr
-; CHECK-FP16: movweq
-; CHECK-FP16: movwvs
-; CHECK-LIBCALL-LABEL: test_fcmp_ueq:
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vcmpe.f32
-; CHECK-LIBCALL: movweq
+; CHECK-VFP: vcmpe.f32
+; CHECK-NOVFP: bl __aeabi_fcmpeq
+; CHECK-FP16: vmrs APSR_nzcv, fpscr
+; CHECK-LIBCALL: movw{{ne|eq}}
 define i1 @test_fcmp_ueq(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -202,19 +195,18 @@
   ret i1 %r
 }
 
-; CHECK-FP16-LABEL: test_br_cc:
+; CHECK-ALL-LABEL: test_br_cc:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcmpe.f32
-; CHECK-FP16: vmrs APSR_nzcv, fpscr
-; CHECK-FP16: strmi
-; CHECK-FP16: strpl
-; CHECK-LIBCALL-LABEL: test_br_cc:
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vcmpe.f32
-; CHECK-LIBCALL: strmi
-; CHECK-LIBCALL: strpl
+; CHECK-VFP: vcmpe.f32
+; CHECK-NOVFP: bl __aeabi_fcmplt
+; CHECK-FP16: vmrs APSR_nzcv, fpscr
+; CHECK-VFP: strmi
+; CHECK-VFP: strpl
+; CHECK-NOVFP: strne
+; CHECK-NOVFP: streq
 define void @test_br_cc(half* %p, half* %q, i32* %p1, i32* %p2) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -229,20 +221,19 @@
 }
 
 declare i1 @test_dummy(half* %p) #0
-; CHECK-FP16-LABEL: test_phi:
+; CHECK-ALL-LABEL: test_phi:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: [[LOOP:.LBB[1-9_]+]]:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: bl      test_dummy
 ; CHECK-FP16: bne     [[LOOP]]
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_phi:
-; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL-VFP: bl __aeabi_h2f
 ; CHECK-LIBCALL: [[LOOP:.LBB[1-9_]+]]:
-; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL-VFP: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl test_dummy
 ; CHECK-LIBCALL: bne     [[LOOP]]
-; CHECK-LIBCALL: bl __aeabi_f2h
+; CHECK-LIBCALL-VFP: bl __aeabi_f2h
 define void @test_phi(half* %p) #0 {
 entry:
   %a = load half, half* %p
@@ -257,59 +248,52 @@
   ret void
 }
 
-; CHECK-FP16-LABEL: test_fptosi_i32:
+; CHECK-ALL-LABEL: test_fptosi_i32:
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvt.s32.f32
-; CHECK-LIBCALL-LABEL: test_fptosi_i32:
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vcvt.s32.f32
+; CHECK-VFP: vcvt.s32.f32
+; CHECK-NOVFP: bl __aeabi_f2iz
 define i32 @test_fptosi_i32(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = fptosi half %a to i32
   ret i32 %r
 }
 
-; CHECK-FP16-LABEL: test_fptosi_i64:
+; CHECK-ALL-LABEL: test_fptosi_i64:
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: bl __aeabi_f2lz
-; CHECK-LIBCALL-LABEL: test_fptosi_i64:
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: bl __aeabi_f2lz
+; CHECK-ALL: bl __aeabi_f2lz
 define i64 @test_fptosi_i64(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = fptosi half %a to i64
   ret i64 %r
 }
 
-; CHECK-FP16-LABEL: test_fptoui_i32:
+; CHECK-ALL-LABEL: test_fptoui_i32:
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvt.u32.f32
-; CHECK-LIBCALL-LABEL: test_fptoui_i32:
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vcvt.u32.f32
+; CHECK-VFP: vcvt.u32.f32
+; CHECK-NOVFP: bl __aeabi_f2uiz
 define i32 @test_fptoui_i32(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = fptoui half %a to i32
   ret i32 %r
 }
 
-; CHECK-FP16-LABEL: test_fptoui_i64:
+; CHECK-ALL-LABEL: test_fptoui_i64:
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: bl __aeabi_f2ulz
-; CHECK-LIBCALL-LABEL: test_fptoui_i64:
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: bl __aeabi_f2ulz
+; CHECK-ALL: bl __aeabi_f2ulz
 define i64 @test_fptoui_i64(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = fptoui half %a to i64
   ret i64 %r
 }
 
-; CHECK-FP16-LABEL: test_sitofp_i32:
-; CHECK-FP16: vcvt.f32.s32
+; CHECK-ALL-LABEL: test_sitofp_i32:
+; CHECK-VFP: vcvt.f32.s32
+; CHECK-NOVFP: bl __aeabi_i2f
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_sitofp_i32:
-; CHECK-LIBCALL: vcvt.f32.s32
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_sitofp_i32(i32 %a, half* %p) #0 {
   %r = sitofp i32 %a to half
@@ -317,11 +301,10 @@
   ret void
 }
 
-; CHECK-FP16-LABEL: test_uitofp_i32:
-; CHECK-FP16: vcvt.f32.u32
+; CHECK-ALL-LABEL: test_uitofp_i32:
+; CHECK-VFP: vcvt.f32.u32
+; CHECK-NOVFP: bl __aeabi_ui2f
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_uitofp_i32:
-; CHECK-LIBCALL: vcvt.f32.u32
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_uitofp_i32(i32 %a, half* %p) #0 {
   %r = uitofp i32 %a to half
@@ -329,11 +312,9 @@
   ret void
 }
 
-; CHECK-FP16-LABEL: test_sitofp_i64:
-; CHECK-FP16: bl __aeabi_l2f
+; CHECK-ALL-LABEL: test_sitofp_i64:
+; CHECK-ALL: bl __aeabi_l2f
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_sitofp_i64:
-; CHECK-LIBCALL: bl __aeabi_l2f
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_sitofp_i64(i64 %a, half* %p) #0 {
   %r = sitofp i64 %a to half
@@ -341,11 +322,9 @@
   ret void
 }
 
-; CHECK-FP16-LABEL: test_uitofp_i64:
-; CHECK-FP16: bl __aeabi_ul2f
+; CHECK-ALL-LABEL: test_uitofp_i64:
+; CHECK-ALL: bl __aeabi_ul2f
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_uitofp_i64:
-; CHECK-LIBCALL: bl __aeabi_ul2f
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_uitofp_i64(i64 %a, half* %p) #0 {
   %r = uitofp i64 %a to half
@@ -385,10 +364,10 @@
 
 ; CHECK-FP16-LABEL: test_fpextend_double:
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvt.f64.f32
 ; CHECK-LIBCALL-LABEL: test_fpextend_double:
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vcvt.f64.f32
+; CHECK-VFP: vcvt.f64.f32
+; CHECK-NOVFP: bl __aeabi_f2d
 define double @test_fpextend_double(half* %p) {
   %a = load half, half* %p, align 2
   %r = fpext half %a to double
@@ -438,13 +417,13 @@
 declare half @llvm.round.f16(half %a) #0
 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
 
-; CHECK-FP16-LABEL: test_sqrt:
+; CHECK-ALL-LABEL: test_sqrt:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vsqrt.f32
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_sqrt:
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vsqrt.f32
+; CHECK-VFP-LIBCALL: vsqrt.f32
+; CHECK-NOVFP: bl sqrtf
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_sqrt(half* %p) #0 {
   %a = load half, half* %p, align 2
@@ -671,7 +650,10 @@
 ; CHECK-LIBCALL-LABEL: test_copysign:
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vbsl
+; CHECK-VFP-LIBCALL: vbsl
+; CHECK-NOVFP: bfc
+; CHECK-NOVFP: and
+; CHECK-NOVFP: orr
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_copysign(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
@@ -781,7 +763,8 @@
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vmla.f32
+; CHECK-VFP-LIBCALL: vmla.f32
+; CHECK-NOVFP: bl __aeabi_fmul
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
   %a = load half, half* %p, align 2
@@ -797,31 +780,28 @@
 ; and extractelement have these extra loads and stores.
 
 ; CHECK-ALL-LABEL: test_insertelement:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: .pad #8
-; CHECK-ALL-NEXT: sub sp, sp, #8
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: mov
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: add
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: add sp, sp, #8
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: sub sp, sp, #8
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: mov
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: add
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: add sp, sp, #8
 define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
   %a = load half, half* %p, align 2
   %b = load <4 x half>, <4 x half>* %q, align 8
@@ -831,23 +811,30 @@
 }
 
 ; CHECK-ALL-LABEL: test_extractelement:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: .pad #8
-; CHECK-ALL-NEXT: sub sp, sp, #8
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: orr
-; CHECK-ALL-NEXT: str
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: orr
-; CHECK-ALL-NEXT: str
-; CHECK-ALL-NEXT: mov
-; CHECK-ALL-NEXT: add
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: add sp, sp, #8
-; CHECK-ALL-NEXT: bx lr
+; CHECK-VFP: sub sp, sp, #8
+; CHECK-VFP: ldrh
+; CHECK-VFP: ldrh
+; CHECK-VFP: orr
+; CHECK-VFP: str
+; CHECK-VFP: ldrh
+; CHECK-VFP: ldrh
+; CHECK-VFP: orr
+; CHECK-VFP: str
+; CHECK-VFP: mov
+; CHECK-VFP: add
+; CHECK-VFP: ldrh
+; CHECK-VFP: strh
+; CHECK-VFP: add sp, sp, #8
+; CHECK-VFP: bx lr
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
 define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 {
   %a = load <4 x half>, <4 x half>* %q, align 8
   %b = extractelement <4 x half> %a, i32 %i
@@ -860,12 +847,10 @@
 %struct.dummy = type { i32, half }
 
 ; CHECK-ALL-LABEL: test_insertvalue:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: ldr
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: str
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL-DAG: ldr
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: str
 define void @test_insertvalue(%struct.dummy* %p, half* %q) {
   %a = load %struct.dummy, %struct.dummy* %p
   %b = load half, half* %q
@@ -875,10 +860,9 @@
 }
 
 ; CHECK-ALL-LABEL: test_extractvalue:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: .fnstart
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
 define void @test_extractvalue(%struct.dummy* %p, half* %q) {
   %a = load %struct.dummy, %struct.dummy* %p
   %b = extractvalue %struct.dummy %a, 1
@@ -886,10 +870,11 @@
   ret void
 }
 
-; CHECK-FP16-LABEL: test_struct_return:
+; CHECK-ALL-LABEL: test_struct_return:
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-LIBCALL-LABEL: test_struct_return:
-; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP-LIBCALL: bl __aeabi_h2f
+; CHECK-NOVFP-DAG: ldr
+; CHECK-NOVFP-DAG: ldrh
 define %struct.dummy @test_struct_return(%struct.dummy* %p) {
   %a = load %struct.dummy, %struct.dummy* %p
   ret %struct.dummy %a
@@ -897,6 +882,7 @@
 
 ; CHECK-ALL-LABEL: test_struct_arg:
 ; CHECK-ALL-NEXT: .fnstart
+; CHECK-NOVFP-NEXT: mov r0, r1
 ; CHECK-ALL-NEXT: bx lr
 define half @test_struct_arg(%struct.dummy %p) {
   %a = extractvalue %struct.dummy %p, 1