Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -12982,13 +12982,18 @@ } static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + SDValue Op0 = N->getOperand(0); + + // VMOVhr (VMOVrh (X)) -> X + if (Op0->getOpcode() == ARMISD::VMOVrh) + return Op0->getOperand(0); + // FullFP16: half values are passed in S-registers, and we don't // need any of the bitcast and moves: // // t2: f32,ch = CopyFromReg t0, Register:f32 %0 // t5: i32 = bitcast t2 // t18: f16 = ARMISD::VMOVhr t5 - SDValue Op0 = N->getOperand(0); if (Op0->getOpcode() == ISD::BITCAST) { SDValue Copy = Op0->getOperand(0); if (Copy.getValueType() == MVT::f32 && @@ -13014,6 +13019,10 @@ SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // VMOVrh (VMOVhr (X)) -> X + if (N0->getOpcode() == ARMISD::VMOVhr) + return N0->getOperand(0); + // fold (VMOVrh (load x)) -> (zextload (i16*)x) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); Index: llvm/test/CodeGen/Thumb2/mve-vdup.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vdup.ll +++ llvm/test/CodeGen/Thumb2/mve-vdup.ll @@ -256,8 +256,6 @@ ; CHECK-NEXT: vldr.16 s0, [r2] ; CHECK-NEXT: vldr.16 s2, [r1] ; CHECK-NEXT: vadd.f16 s0, s2, s0 -; CHECK-NEXT: vmov.f16 r1, s0 -; CHECK-NEXT: vmov.f16 s0, r1 ; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: bx lr entry: