Index: lib/Target/ARM/ARMCallingConv.td
===================================================================
--- lib/Target/ARM/ARMCallingConv.td
+++ lib/Target/ARM/ARMCallingConv.td
@@ -20,7 +20,7 @@
 
   // Handles byval parameters.
   CCIfByVal<CCPassByVal<4, 4>>,
-    
+
   CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   // Pass SwiftSelf in a callee saved register.
@@ -214,8 +214,8 @@
 
   CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
   CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
-  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
-                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCIfType<[f16, f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                      S9, S10, S11, S12, S13, S14, S15]>>,
   CCDelegateTo<CC_ARM_AAPCS_Common>
 ]>;
 
@@ -232,7 +232,7 @@
 
   CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
   CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
-  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+  CCIfType<[f16, f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
                                  S9, S10, S11, S12, S13, S14, S15]>>,
   CCDelegateTo<RetCC_ARM_AAPCS_Common>
 ]>;
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -530,6 +530,9 @@
     addRegisterClass(MVT::f64, &ARM::DPRRegClass);
   }
 
+  if (Subtarget->hasFullFP16())
+    addRegisterClass(MVT::f16, &ARM::HPRRegClass);
+
   for (MVT VT : MVT::vector_valuetypes()) {
     for (MVT InnerVT : MVT::vector_valuetypes()) {
       setTruncStoreAction(VT, InnerVT, Expand);
@@ -3699,7 +3702,9 @@
       } else {
         const TargetRegisterClass *RC;
 
-        if (RegVT == MVT::f32)
+        if (RegVT == MVT::f16)
+          RC = &ARM::HPRRegClass;
+        else if (RegVT == MVT::f32)
           RC = &ARM::SPRRegClass;
         else if (RegVT == MVT::f64)
           RC = &ARM::DPRRegClass;
Index: lib/Target/ARM/ARMInstrVFP.td
===================================================================
--- lib/Target/ARM/ARMInstrVFP.td
+++ lib/Target/ARM/ARMInstrVFP.td
@@ -355,9 +355,9 @@
 
 let TwoOperandAliasConstraint = "$Sn = $Sd" in
 def VADDH  : AHbI<0b11100, 0b11, 0, 0,
-                  (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                  (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
                   IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
-                  []>,
+                  [(set HPR:$Sd, (fadd HPR:$Sn, HPR:$Sm))]>,
              Sched<[WriteFPALU32]>;
 
 let TwoOperandAliasConstraint = "$Dn = $Dd" in
@@ -380,9 +380,9 @@
 
 let TwoOperandAliasConstraint = "$Sn = $Sd" in
 def VSUBH  : AHbI<0b11100, 0b11, 1, 0,
-                  (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                  (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
                   IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
-                  []>,
+                  [(set HPR:$Sd, (fsub HPR:$Sn, HPR:$Sm))]>,
             Sched<[WriteFPALU32]>;
 
 let TwoOperandAliasConstraint = "$Dn = $Dd" in
Index: lib/Target/ARM/ARMRegisterInfo.td
===================================================================
--- lib/Target/ARM/ARMRegisterInfo.td
+++ lib/Target/ARM/ARMRegisterInfo.td
@@ -296,6 +296,23 @@
   }];
 }
 
+// Half-precision (FullFP16) register class. It's exactly the same as the 
+// single-precision class, using the same S-registers. Each instruction that generates a 
+// FP16 result writes that to the bottom 16 bits of the associated 32-bit Floating-point
+// register and the top 16 bits of the 32-bit floating-point register are written to 0.
+// A different register class is added, as opposed to adding f16 to SPR, to avoid 
+// modifying and adding type information to the rules.
+def HPR : RegisterClass<"ARM", [f16], 32, (sequence "S%u", 0, 31)> {
+  let AltOrders = [(add (decimate SPR, 2), SPR),
+                   (add (decimate SPR, 4),
+                        (decimate SPR, 2),
+                        (decimate (rotl SPR, 1), 4),
+                        (decimate (rotl SPR, 1), 2))];
+  let AltOrderSelect = [{
+    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+  }];
+}
+
 // Subset of SPR which can be used as a source of NEON scalars for 16-bit
 // operations
 def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
Index: lib/Target/ARM/Disassembler/ARMDisassembler.cpp
===================================================================
--- lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -158,6 +158,8 @@
                                    uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -182,6 +184,8 @@
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeHPRRegListOperand(MCInst &Inst, unsigned Val,
+                               uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
@@ -996,6 +1000,11 @@
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder) {
+  return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
 static const uint16_t DPRDecoderTable[] = {
      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
@@ -1253,6 +1262,11 @@
   return S;
 }
 
+static DecodeStatus DecodeHPRRegListOperand(MCInst &Inst, unsigned Val,
+                                 uint64_t Address, const void *Decoder) {
+  return DecodeSPRRegListOperand(Inst, Val, Address, Decoder);
+}
+
 static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
Index: test/CodeGen/ARM/fp16-instructions.ll
===================================================================
--- /dev/null
+++ test/CodeGen/ARM/fp16-instructions.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefix=CHECK-SOFT
+; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 -float-abi=hard | FileCheck %s --check-prefix=CHECK-FP16
+; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+neon,+fullfp16 -float-abi=hard | FileCheck %s --check-prefix=CHECK-FULLFP16
+
+define half @Sub(half %a, half %b) local_unnamed_addr {
+entry:
+;CHECK-SOFT-LABEL:      Sub:
+;CHECK-SOFT:            bl  __aeabi_h2f
+;CHECK-SOFT:            bl  __aeabi_h2f
+;CHECK-SOFT:            bl  __aeabi_fsub
+;CHECK-SOFT:            bl  __aeabi_f2h
+
+;CHECK-FP16-LABEL:      Sub:
+;CHECK-FP16:            vsub.f32  s0, s0, s2
+;CHECK-FP16-NEXT:       mov pc, lr
+
+;CHECK-FULLFP16-LABEL:  Sub:
+;CHECK-FULLFP16:        vsub.f16  s0, s0, s1
+;CHECK-FULLFP16-NEXT:   mov pc, lr
+
+  %sub = fsub half %a, %b
+  ret half %sub
+}
+
+define half @Add(half %a, half %b) local_unnamed_addr {
+entry:
+;CHECK-SOFT-LABEL:      Add:
+;CHECK-SOFT:            bl  __aeabi_h2f
+;CHECK-SOFT:            bl  __aeabi_h2f
+;CHECK-SOFT:            bl  __aeabi_fadd
+;CHECK-SOFT:            bl  __aeabi_f2h
+
+;CHECK-FP16-LABEL:      Add:
+;CHECK-FP16:            vadd.f32  s0, s0, s2
+;CHECK-FP16-NEXT:       mov pc, lr
+
+;CHECK-FULLFP16-LABEL:  Add:
+;CHECK-FULLFP16:        vadd.f16  s0, s0, s1
+;CHECK-FULLFP16-NEXT:   mov pc, lr
+
+  %add = fadd half %a, %b
+  ret half %add
+}