diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13030,13 +13030,18 @@ } static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + SDValue Op0 = N->getOperand(0); + + // VMOVhr (VMOVrh (X)) -> X + if (Op0->getOpcode() == ARMISD::VMOVrh) + return Op0->getOperand(0); + // FullFP16: half values are passed in S-registers, and we don't // need any of the bitcast and moves: // // t2: f32,ch = CopyFromReg t0, Register:f32 %0 // t5: i32 = bitcast t2 // t18: f16 = ARMISD::VMOVhr t5 - SDValue Op0 = N->getOperand(0); if (Op0->getOpcode() == ISD::BITCAST) { SDValue Copy = Op0->getOperand(0); if (Copy.getValueType() == MVT::f32 && diff --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll --- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll @@ -256,8 +256,6 @@ ; CHECK-NEXT: vldr.16 s0, [r2] ; CHECK-NEXT: vldr.16 s2, [r1] ; CHECK-NEXT: vadd.f16 s0, s2, s0 -; CHECK-NEXT: vmov.f16 r1, s0 -; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: bx lr entry: