Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -584,6 +584,8 @@ bool preferZeroCompareBranch() const override { return true; } + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + bool isShuffleMaskLegal(ArrayRef M, EVT VT) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -21191,6 +21191,20 @@ return Subtarget->hasV6T2Ops(); } +bool ARMTargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + if (!Subtarget->hasV7Ops()) + return false; + + // Sink the `and` instruction only if the mask would fit into a modified + // immediate operand. + ConstantInt *Mask = dyn_cast(AndI.getOperand(1)); + if (!Mask) + return false; + uint64_t MaskVal = Mask->getValue().getZExtValue(); + return (MaskVal >> 32) == 0 && ARM_AM::getSOImmVal(unsigned(MaskVal)) != -1; +} + bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { return !Subtarget->hasMinSize() || Subtarget->isTargetWindows(); } Index: llvm/test/CodeGen/ARM/and-cmp0-sink.ll =================================================================== --- llvm/test/CodeGen/ARM/and-cmp0-sink.ll +++ llvm/test/CodeGen/ARM/and-cmp0-sink.ll @@ -10,86 +10,99 @@ ; V7M-LABEL: f: ; V7M: @ %bb.0: @ %entry ; V7M-NEXT: movs r2, #0 -; V7M-NEXT: and r12, r0, #14 ; V7M-NEXT: str r2, [r1] -; V7M-NEXT: and r3, r0, #4 -; V7M-NEXT: and r2, r0, #2 -; V7M-NEXT: lsls r0, r0, #31 -; V7M-NEXT: bne .LBB0_2 +; V7M-NEXT: lsls r2, r0, #31 +; V7M-NEXT: bne .LBB0_3 ; V7M-NEXT: @ %bb.1: @ %if.then -; V7M-NEXT: cmp r2, #0 -; V7M-NEXT: it ne -; V7M-NEXT: movne.w r2, #33024 -; V7M-NEXT: cmp r3, #0 -; V7M-NEXT: it ne -; V7M-NEXT: addne.w r2, r2, #16512 -; V7M-NEXT: b .LBB0_3 -; V7M-NEXT: .LBB0_2: @ %if.else -; V7M-NEXT: cmp r2, #0 -; V7M-NEXT: it ne -; V7M-NEXT: movne.w r2, #8256 -; V7M-NEXT: cmp r3, #0 -; V7M-NEXT: it ne -; V7M-NEXT: addne.w r2, r2, #4128 +; V7M-NEXT: tst.w r0, #14 +; V7M-NEXT: beq .LBB0_6 +; V7M-NEXT: @ %bb.2: +; V7M-NEXT: lsls r2, r0, #30 +; V7M-NEXT: mov.w r3, #33024 +; V7M-NEXT: and.w r2, r3, r2, asr #31 +; V7M-NEXT: lsrs r0, r0, #2 +; V7M-NEXT: bfi r2, r0, #7, #1 +; V7M-NEXT: bfi r2, r0, #14, #1 +; V7M-NEXT: b .LBB0_5 ; V7M-NEXT: .LBB0_3: @ %if.else -; V7M-NEXT: cmp.w r12, #0 -; V7M-NEXT: it ne -; V7M-NEXT: strne r2, [r1] +; V7M-NEXT: tst.w r0, #14 +; V7M-NEXT: it eq +; V7M-NEXT: bxeq lr +; V7M-NEXT: .LBB0_4: +; V7M-NEXT: lsls r2, r0, #30 +; V7M-NEXT: mov.w r3, #8256 +; V7M-NEXT: and.w r2, r3, r2, asr #31 +; V7M-NEXT: lsrs r0, r0, #2 +; V7M-NEXT: bfi r2, r0, #5, #1 +; V7M-NEXT: bfi r2, r0, #12, #1 +; V7M-NEXT: .LBB0_5: @ %if.end +; V7M-NEXT: str r2, [r1] +; V7M-NEXT: .LBB0_6: @ %exit ; V7M-NEXT: bx lr ; ; V7A-LABEL: f: ; V7A: @ %bb.0: @ %entry ; V7A-NEXT: mov r2, #0 -; V7A-NEXT: and r12, r0, #14 -; V7A-NEXT: str r2, [r1] -; V7A-NEXT: and r3, r0, #4 -; V7A-NEXT: and r2, r0, #2 ; V7A-NEXT: tst r0, #1 -; V7A-NEXT: bne .LBB0_2 +; V7A-NEXT: str r2, [r1] +; V7A-NEXT: bne .LBB0_3 ; V7A-NEXT: @ %bb.1: @ %if.then -; V7A-NEXT: cmp r2, #0 -; V7A-NEXT: movw r0, #16512 -; V7A-NEXT: movwne r2, #33024 -; V7A-NEXT: b .LBB0_3 -; V7A-NEXT: .LBB0_2: @ %if.else -; V7A-NEXT: cmp r2, #0 -; V7A-NEXT: movw r0, #4128 -; V7A-NEXT: movwne r2, #8256 +; V7A-NEXT: tst r0, #14 +; V7A-NEXT: beq .LBB0_6 +; V7A-NEXT: @ %bb.2: +; V7A-NEXT: lsl r2, r0, #30 +; V7A-NEXT: mov r3, #33024 +; V7A-NEXT: and r2, r3, r2, asr #31 +; V7A-NEXT: lsr r0, r0, #2 +; V7A-NEXT: bfi r2, r0, #7, #1 +; V7A-NEXT: bfi r2, r0, #14, #1 +; V7A-NEXT: b .LBB0_5 ; V7A-NEXT: .LBB0_3: @ %if.else -; V7A-NEXT: cmp r3, #0 -; V7A-NEXT: orrne r2, r2, r0 -; V7A-NEXT: cmp r12, #0 -; V7A-NEXT: strne r2, [r1] +; V7A-NEXT: tst r0, #14 +; V7A-NEXT: bxeq lr +; V7A-NEXT: .LBB0_4: +; V7A-NEXT: lsl r2, r0, #30 +; V7A-NEXT: mov r3, #8256 +; V7A-NEXT: and r2, r3, r2, asr #31 +; V7A-NEXT: lsr r0, r0, #2 +; V7A-NEXT: bfi r2, r0, #5, #1 +; V7A-NEXT: bfi r2, r0, #12, #1 +; V7A-NEXT: .LBB0_5: @ %if.end +; V7A-NEXT: str r2, [r1] +; V7A-NEXT: .LBB0_6: @ %exit ; V7A-NEXT: bx lr ; ; V7A-T-LABEL: f: ; V7A-T: @ %bb.0: @ %entry ; V7A-T-NEXT: movs r2, #0 -; V7A-T-NEXT: and r12, r0, #14 ; V7A-T-NEXT: str r2, [r1] -; V7A-T-NEXT: and r3, r0, #4 -; V7A-T-NEXT: and r2, r0, #2 -; V7A-T-NEXT: lsls r0, r0, #31 -; V7A-T-NEXT: bne .LBB0_2 +; V7A-T-NEXT: lsls r2, r0, #31 +; V7A-T-NEXT: bne .LBB0_3 ; V7A-T-NEXT: @ %bb.1: @ %if.then -; V7A-T-NEXT: cmp r2, #0 -; V7A-T-NEXT: it ne -; V7A-T-NEXT: movne.w r2, #33024 -; V7A-T-NEXT: cmp r3, #0 -; V7A-T-NEXT: it ne -; V7A-T-NEXT: addne.w r2, r2, #16512 -; V7A-T-NEXT: b .LBB0_3 -; V7A-T-NEXT: .LBB0_2: @ %if.else -; V7A-T-NEXT: cmp r2, #0 -; V7A-T-NEXT: it ne -; V7A-T-NEXT: movne.w r2, #8256 -; V7A-T-NEXT: cmp r3, #0 -; V7A-T-NEXT: it ne -; V7A-T-NEXT: addne.w r2, r2, #4128 +; V7A-T-NEXT: tst.w r0, #14 +; V7A-T-NEXT: beq .LBB0_6 +; V7A-T-NEXT: @ %bb.2: +; V7A-T-NEXT: lsls r2, r0, #30 +; V7A-T-NEXT: mov.w r3, #33024 +; V7A-T-NEXT: and.w r2, r3, r2, asr #31 +; V7A-T-NEXT: lsrs r0, r0, #2 +; V7A-T-NEXT: bfi r2, r0, #7, #1 +; V7A-T-NEXT: bfi r2, r0, #14, #1 +; V7A-T-NEXT: b .LBB0_5 ; V7A-T-NEXT: .LBB0_3: @ %if.else -; V7A-T-NEXT: cmp.w r12, #0 -; V7A-T-NEXT: it ne -; V7A-T-NEXT: strne r2, [r1] +; V7A-T-NEXT: tst.w r0, #14 +; V7A-T-NEXT: it eq +; V7A-T-NEXT: bxeq lr +; V7A-T-NEXT: .LBB0_4: +; V7A-T-NEXT: lsls r2, r0, #30 +; V7A-T-NEXT: mov.w r3, #8256 +; V7A-T-NEXT: and.w r2, r3, r2, asr #31 +; V7A-T-NEXT: lsrs r0, r0, #2 +; V7A-T-NEXT: bfi r2, r0, #5, #1 +; V7A-T-NEXT: bfi r2, r0, #12, #1 +; V7A-T-NEXT: .LBB0_5: @ %if.end +; V7A-T-NEXT: str r2, [r1] +; V7A-T-NEXT: .LBB0_6: @ %exit ; V7A-T-NEXT: bx lr ; ; V6M-LABEL: f: Index: llvm/test/CodeGen/Thumb2/mve-float16regloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1426,12 +1426,11 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: ldrd r12, r6, [r0, #4] ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: ldrb r0, [r0] -; CHECK-NEXT: and r8, r3, #1 +; CHECK-NEXT: ldrd r12, r6, [r0, #4] +; CHECK-NEXT: ldrb.w r9, [r0] ; CHECK-NEXT: vldr.16 s0, .LCPI17_0 -; CHECK-NEXT: lsr.w r9, r3, #1 +; CHECK-NEXT: lsr.w r8, r3, #1 ; CHECK-NEXT: b .LBB17_3 ; CHECK-NEXT: .LBB17_1: @ %if.else ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 @@ -1441,7 +1440,7 @@ ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 ; CHECK-NEXT: vstr.16 s5, [r12, #2] ; CHECK-NEXT: adds r6, #10 -; CHECK-NEXT: subs r0, #1 +; CHECK-NEXT: subs.w r9, r9, #1 ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: beq .LBB17_8 @@ -1458,7 +1457,7 @@ ; CHECK-NEXT: vldrh.u16 q1, [r12] ; CHECK-NEXT: vmov.f32 s5, s1 ; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: wls lr, r9, .LBB17_6 +; CHECK-NEXT: wls lr, r8, .LBB17_6 ; CHECK-NEXT: @ %bb.4: @ %while.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 ; CHECK-NEXT: mov r5, r2 @@ -1466,7 +1465,7 @@ ; CHECK-NEXT: @ Parent Loop BB17_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldrh r7, [r1], #4 -; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vfma.f16 q1, q2, r7 ; CHECK-NEXT: ldrh r4, [r1, #-2] ; CHECK-NEXT: vmov.u16 r7, q1[0] @@ -1478,19 +1477,19 @@ ; CHECK-NEXT: strh r4, [r5, #2] ; CHECK-NEXT: vmov.f32 s4, s5 ; CHECK-NEXT: strh r7, [r5], #4 -; CHECK-NEXT: vmov.16 q1[2], r3 +; CHECK-NEXT: vmov.16 q1[2], r0 ; CHECK-NEXT: le lr, .LBB17_5 ; CHECK-NEXT: .LBB17_6: @ %while.end ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 -; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: lsls r0, r3, #31 ; CHECK-NEXT: beq .LBB17_1 ; CHECK-NEXT: @ %bb.7: @ %if.then ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 -; CHECK-NEXT: ldrh r1, [r1] -; CHECK-NEXT: vfma.f16 q1, q2, r1 -; CHECK-NEXT: vmov.u16 r1, q1[0] -; CHECK-NEXT: vfma.f16 q1, q3, r1 -; CHECK-NEXT: strh r1, [r5] +; CHECK-NEXT: ldrh r0, [r1] +; CHECK-NEXT: vfma.f16 q1, q2, r0 +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vfma.f16 q1, q3, r0 +; CHECK-NEXT: strh r0, [r5] ; CHECK-NEXT: vmovx.f16 s2, s4 ; CHECK-NEXT: vstr.16 s2, [r12] ; CHECK-NEXT: b .LBB17_2 Index: llvm/test/CodeGen/Thumb2/mve-float32regloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -2015,9 +2015,8 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: ldrd r12, r6, [r0, #4] -; CHECK-NEXT: and r8, r3, #1 +; CHECK-NEXT: lsr.w r8, r3, #1 ; CHECK-NEXT: ldrb r0, [r0] -; CHECK-NEXT: lsrs r3, r3, #1 ; CHECK-NEXT: vldr s0, .LCPI20_0 ; CHECK-NEXT: b .LBB20_3 ; CHECK-NEXT: .LBB20_1: @ %if.else @@ -2046,7 +2045,7 @@ ; CHECK-NEXT: vmov.f32 s6, s0 ; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: vmov.f32 s7, s0 -; CHECK-NEXT: wls lr, r3, .LBB20_6 +; CHECK-NEXT: wls lr, r8, .LBB20_6 ; CHECK-NEXT: @ %bb.4: @ %while.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 ; CHECK-NEXT: vmov q6, q1 @@ -2073,7 +2072,7 @@ ; CHECK-NEXT: le lr, .LBB20_5 ; CHECK-NEXT: .LBB20_6: @ %while.end ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 -; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: lsls r7, r3, #31 ; CHECK-NEXT: beq .LBB20_1 ; CHECK-NEXT: @ %bb.7: @ %if.then ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1