diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -15000,16 +15000,31 @@
   // FullFP16: half values are passed in S-registers, and we don't
   // need any of the bitcast and moves:
   //
-  //     t2: f32,ch = CopyFromReg t0, Register:f32 %0
+  //     t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
   //   t5: i32 = bitcast t2
   // t18: f16 = ARMISD::VMOVhr t5
+  // =>
+  // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
   if (Op0->getOpcode() == ISD::BITCAST) {
     SDValue Copy = Op0->getOperand(0);
     if (Copy.getValueType() == MVT::f32 &&
         Copy->getOpcode() == ISD::CopyFromReg) {
-      SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1)};
+      bool HasGlue = Copy->getNumOperands() == 3;
+      SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
+                       HasGlue ? Copy->getOperand(2) : SDValue()};
+      EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
       SDValue NewCopy =
-          DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N), N->getValueType(0), Ops);
+          DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N),
+                          DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
+                          ArrayRef(Ops, HasGlue ? 3 : 2));
+
+      // Update Users, Chains, and Potential Glue.
+      DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
+      DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
+      if (HasGlue)
+        DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
+                                          NewCopy.getValue(2));
+
       return NewCopy;
     }
   }
diff --git a/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll b/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll
--- a/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll
+++ b/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll
@@ -59,8 +59,10 @@
 ; FP16-HARD:  @ %bb.0:
 ; FP16-HARD:    vmov.f32 s16, s0
 ; FP16-HARD:    bl fp16_inner
+; FP16-HARD:    vmov.f32 s18, s0
 ; FP16-HARD:    vmov.f32 s0, s16
 ; FP16-HARD:    bl other
+; FP16-HARD:    vmov.f32 s0, s18
   %call = call half @fp16_inner()
   %call1 = call float @other(float %arg)
   ret half %call
@@ -105,13 +107,13 @@
 ; FP16-HARD:  @ %bb.0:
 ; FP16-HARD:    vmov.f32 s16, s0
 ; FP16-HARD:    bl fp16_inner
+; FP16-HARD:    vmov.f32 s18, s0
 ; FP16-HARD:    vmov.f32 s0, s16
 ; FP16-HARD:    bl other
-; FP16-HARD:    vmov.f16 r0, s0
-; FP16-HARD:    vmov.f32 s16, s0
+; FP16-HARD:    vmov.f16 r0, s18
 ; FP16-HARD:    vmov s0, r0
 ; FP16-HARD:    bl fp16_sink
-; FP16-HARD:    vmov.f32 s0, s16
+; FP16-HARD:    vmov.f32 s0, s18
   %call = call half @fp16_inner()
   %call1 = call float @other(float %arg)
   call void @fp16_sink(half %call)