diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14457,6 +14457,25 @@ return SDValue(); } +static SDValue PerformVQMOVNCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + unsigned IsTop = N->getConstantOperandVal(2); + + unsigned NumElts = N->getValueType(0).getVectorNumElements(); + APInt Op0DemandedElts = + APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1) + : APInt::getHighBitsSet(2, 1)); + + APInt KnownUndef, KnownZero; + const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo(); + if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef, + KnownZero, DCI)) + return SDValue(N, 0); + return SDValue(); +} + static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); SDValue Op0 = N->getOperand(0); @@ -15593,6 +15612,9 @@ return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget); case ARMISD::VMOVN: return PerformVMOVNCombine(N, DCI); + case ARMISD::VQMOVNs: + case ARMISD::VQMOVNu: + return PerformVQMOVNCombine(N, DCI); case ARMISD::ASRL: case ARMISD::LSRL: case ARMISD::LSLL: diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -1416,7 +1416,6 @@ ; CHECK-NEXT: vmullt.s16 q2, q1, q0 ; CHECK-NEXT: vmullb.s16 q0, q1, q0 ; CHECK-NEXT: vqshrnb.s32 q0, q0, #15 -; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vqshrnt.s32 q0, q2, #15 ; CHECK-NEXT: vstrb.8 q0, [r2], #16 ; CHECK-NEXT: le lr, .LBB7_4 @@ -1856,7 +1855,6 @@ ; CHECK-NEXT: vmullt.s16 q7, q6, q5 ; CHECK-NEXT: vmullb.s16 q5, q6, q5 ; CHECK-NEXT: vqshrnb.s32 q5, q5, #15 -; CHECK-NEXT: vmovlb.s16 q5, q5 ; CHECK-NEXT: vqshrnt.s32 q5, q7, #15 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q5, [r2], #16 @@ -2645,7 +2643,6 @@ ; CHECK-NEXT: vmullt.s8 q2, q1, q0 ; CHECK-NEXT: vmullb.s8 q0, q1, q0 ; CHECK-NEXT: vqshrnb.s16 q0, q0, #7 -; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vqshrnt.s16 q0, q2, #7 ; CHECK-NEXT: vstrb.8 q0, [r2], #16 ; CHECK-NEXT: le lr, .LBB16_4 @@ -3310,7 +3307,6 @@ ; CHECK-NEXT: vmullt.s8 q5, q4, q0 ; CHECK-NEXT: vmullb.s8 q0, q4, q0 ; CHECK-NEXT: vqshrnb.s16 q0, q0, #7 -; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vqshrnt.s16 q0, q5, #7 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrbt.8 q0, [r2], #16