diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -15000,16 +15000,31 @@ // FullFP16: half values are passed in S-registers, and we don't // need any of the bitcast and moves: // - // t2: f32,ch = CopyFromReg t0, Register:f32 %0 + // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl? // t5: i32 = bitcast t2 // t18: f16 = ARMISD::VMOVhr t5 + // => + // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl? if (Op0->getOpcode() == ISD::BITCAST) { SDValue Copy = Op0->getOperand(0); if (Copy.getValueType() == MVT::f32 && Copy->getOpcode() == ISD::CopyFromReg) { - SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1)}; + bool HasGlue = Copy->getNumOperands() == 3; + SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1), + HasGlue ? Copy->getOperand(2) : SDValue()}; + EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue}; SDValue NewCopy = - DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N), N->getValueType(0), Ops); + DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N), + DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)), + ArrayRef(Ops, HasGlue ? 3 : 2)); + + // Update Users, Chains, and Potential Glue. + DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0)); + DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1)); + if (HasGlue) + DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2), + NewCopy.getValue(2)); + return NewCopy; } } diff --git a/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll b/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll --- a/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll +++ b/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll @@ -59,8 +59,10 @@ ; FP16-HARD: @ %bb.0: ; FP16-HARD: vmov.f32 s16, s0 ; FP16-HARD: bl fp16_inner +; FP16-HARD: vmov.f32 s18, s0 ; FP16-HARD: vmov.f32 s0, s16 ; FP16-HARD: bl other +; FP16-HARD: vmov.f32 s0, s18 %call = call half @fp16_inner() %call1 = call float @other(float %arg) ret half %call @@ -105,13 +107,13 @@ ; FP16-HARD: @ %bb.0: ; FP16-HARD: vmov.f32 s16, s0 ; FP16-HARD: bl fp16_inner +; FP16-HARD: vmov.f32 s18, s0 ; FP16-HARD: vmov.f32 s0, s16 ; FP16-HARD: bl other -; FP16-HARD: vmov.f16 r0, s0 -; FP16-HARD: vmov.f32 s16, s0 +; FP16-HARD: vmov.f16 r0, s18 ; FP16-HARD: vmov s0, r0 ; FP16-HARD: bl fp16_sink -; FP16-HARD: vmov.f32 s0, s16 +; FP16-HARD: vmov.f32 s0, s18 %call = call half @fp16_inner() %call1 = call float @other(float %arg) call void @fp16_sink(half %call)