diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13844,6 +13844,13 @@ return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0)); } + // Only the bottom 16 bits of the source register are used. + if (Op.getValueType() == MVT::i32) { + APInt DemandedMask = APInt::getLowBitsSet(32, 16); + const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo(); + if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI)) + return SDValue(N, 0); + } return SDValue(); } diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll @@ -139,10 +139,9 @@ ; CHECK-LE-NEXT: mov r4, sp ; CHECK-LE-NEXT: bfc r4, #0, #4 ; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: uxth r0, r0 ; CHECK-LE-NEXT: sub.w r4, r7, #8 -; CHECK-LE-NEXT: vmov.i32 q1, #0x0 ; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 ; CHECK-LE-NEXT: vpsel q0, q0, q1 ; CHECK-LE-NEXT: mov sp, r4 ; CHECK-LE-NEXT: pop {r4, r6, r7, pc} @@ -160,7 +159,6 @@ ; CHECK-BE-NEXT: mov sp, r4 ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: uxth r0, r0 ; CHECK-BE-NEXT: sub.w r4, r7, #8 ; CHECK-BE-NEXT: vrev32.8 q0, q0 ; CHECK-BE-NEXT: vmsr p0, r0 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll b/llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll @@ -51,10 +51,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r6, r7, lr} ; CHECK-NEXT: push {r4, r6, r7, lr} -; CHECK-NEXT: uxth r2, r1 +; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: mvns r1, r1 -; CHECK-NEXT: vmsr p0, r2 -; CHECK-NEXT: uxth r1, r1 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vaddvt.s16 r12, q1 ; CHECK-NEXT: vaddvt.s16 r2, q0 @@ -92,7 +90,6 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: mvns r0, r0 ; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -109,7 +106,6 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: mvns r0, r0 ; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -126,7 +122,6 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: mvns r0, r0 ; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr