diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -584,6 +584,8 @@ bool preferZeroCompareBranch() const override { return true; } + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + bool isShuffleMaskLegal(ArrayRef M, EVT VT) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -21191,6 +21191,21 @@ return Subtarget->hasV6T2Ops(); } +bool ARMTargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + if (!Subtarget->hasV7Ops()) + return false; + + // Sink the `and` instruction only if the mask would fit into a modified + // immediate operand. + ConstantInt *Mask = dyn_cast(AndI.getOperand(1)); + if (!Mask || Mask->getValue().getBitWidth() > 32u) + return false; + auto MaskVal = unsigned(Mask->getValue().getZExtValue()); + return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal) + : ARM_AM::getSOImmVal(MaskVal)) != -1; +} + bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { return !Subtarget->hasMinSize() || Subtarget->isTargetWindows(); } diff --git a/llvm/test/CodeGen/ARM/and-cmp0-sink.ll b/llvm/test/CodeGen/ARM/and-cmp0-sink.ll --- a/llvm/test/CodeGen/ARM/and-cmp0-sink.ll +++ b/llvm/test/CodeGen/ARM/and-cmp0-sink.ll @@ -11,86 +11,99 @@ ; V7M-LABEL: f: ; V7M: @ %bb.0: @ %entry ; V7M-NEXT: movs r2, #0 -; V7M-NEXT: and r12, r0, #14 ; V7M-NEXT: str r2, [r1] -; V7M-NEXT: and r3, r0, #4 -; V7M-NEXT: and r2, r0, #2 -; V7M-NEXT: lsls r0, r0, #31 -; V7M-NEXT: bne .LBB0_2 +; V7M-NEXT: lsls r2, r0, #31 +; V7M-NEXT: bne .LBB0_3 ; V7M-NEXT: @ %bb.1: @ %if.then -; V7M-NEXT: cmp r2, #0 -; V7M-NEXT: it ne -; V7M-NEXT: movne.w r2, #33024 -; V7M-NEXT: cmp r3, #0 -; V7M-NEXT: it ne -; V7M-NEXT: addne.w r2, r2, #16512 -; V7M-NEXT: b .LBB0_3 -; V7M-NEXT: .LBB0_2: @ %if.else -; V7M-NEXT: cmp r2, #0 -; V7M-NEXT: it ne -; V7M-NEXT: movne.w r2, #8256 -; V7M-NEXT: cmp r3, #0 -; V7M-NEXT: it ne -; V7M-NEXT: addne.w r2, r2, #4128 +; V7M-NEXT: tst.w r0, #14 +; V7M-NEXT: beq .LBB0_6 +; V7M-NEXT: @ %bb.2: +; V7M-NEXT: lsls r2, r0, #30 +; V7M-NEXT: mov.w r3, #33024 +; V7M-NEXT: and.w r2, r3, r2, asr #31 +; V7M-NEXT: lsrs r0, r0, #2 +; V7M-NEXT: bfi r2, r0, #7, #1 +; V7M-NEXT: bfi r2, r0, #14, #1 +; V7M-NEXT: b .LBB0_5 ; V7M-NEXT: .LBB0_3: @ %if.else -; V7M-NEXT: cmp.w r12, #0 -; V7M-NEXT: it ne -; V7M-NEXT: strne r2, [r1] +; V7M-NEXT: tst.w r0, #14 +; V7M-NEXT: it eq +; V7M-NEXT: bxeq lr +; V7M-NEXT: .LBB0_4: +; V7M-NEXT: lsls r2, r0, #30 +; V7M-NEXT: mov.w r3, #8256 +; V7M-NEXT: and.w r2, r3, r2, asr #31 +; V7M-NEXT: lsrs r0, r0, #2 +; V7M-NEXT: bfi r2, r0, #5, #1 +; V7M-NEXT: bfi r2, r0, #12, #1 +; V7M-NEXT: .LBB0_5: @ %if.end +; V7M-NEXT: str r2, [r1] +; V7M-NEXT: .LBB0_6: @ %exit ; V7M-NEXT: bx lr ; ; V7A-LABEL: f: ; V7A: @ %bb.0: @ %entry ; V7A-NEXT: mov r2, #0 -; V7A-NEXT: and r12, r0, #14 -; V7A-NEXT: str r2, [r1] -; V7A-NEXT: and r3, r0, #4 -; V7A-NEXT: and r2, r0, #2 ; V7A-NEXT: tst r0, #1 -; V7A-NEXT: bne .LBB0_2 +; V7A-NEXT: str r2, [r1] +; V7A-NEXT: bne .LBB0_3 ; V7A-NEXT: @ %bb.1: @ %if.then -; V7A-NEXT: cmp r2, #0 -; V7A-NEXT: movw r0, #16512 -; V7A-NEXT: movwne r2, #33024 -; V7A-NEXT: b .LBB0_3 -; V7A-NEXT: .LBB0_2: @ %if.else -; V7A-NEXT: cmp r2, #0 -; V7A-NEXT: movw r0, #4128 -; V7A-NEXT: movwne r2, #8256 +; V7A-NEXT: tst r0, #14 +; V7A-NEXT: beq .LBB0_6 +; V7A-NEXT: @ %bb.2: +; V7A-NEXT: lsl r2, r0, #30 +; V7A-NEXT: mov r3, #33024 +; V7A-NEXT: and r2, r3, r2, asr #31 +; V7A-NEXT: lsr r0, r0, #2 +; V7A-NEXT: bfi r2, r0, #7, #1 +; V7A-NEXT: bfi r2, r0, #14, #1 +; V7A-NEXT: b .LBB0_5 ; V7A-NEXT: .LBB0_3: @ %if.else -; V7A-NEXT: cmp r3, #0 -; V7A-NEXT: orrne r2, r2, r0 -; V7A-NEXT: cmp r12, #0 -; V7A-NEXT: strne r2, [r1] +; V7A-NEXT: tst r0, #14 +; V7A-NEXT: bxeq lr +; V7A-NEXT: .LBB0_4: +; V7A-NEXT: lsl r2, r0, #30 +; V7A-NEXT: mov r3, #8256 +; V7A-NEXT: and r2, r3, r2, asr #31 +; V7A-NEXT: lsr r0, r0, #2 +; V7A-NEXT: bfi r2, r0, #5, #1 +; V7A-NEXT: bfi r2, r0, #12, #1 +; V7A-NEXT: .LBB0_5: @ %if.end +; V7A-NEXT: str r2, [r1] +; V7A-NEXT: .LBB0_6: @ %exit ; V7A-NEXT: bx lr ; ; V7A-T-LABEL: f: ; V7A-T: @ %bb.0: @ %entry ; V7A-T-NEXT: movs r2, #0 -; V7A-T-NEXT: and r12, r0, #14 ; V7A-T-NEXT: str r2, [r1] -; V7A-T-NEXT: and r3, r0, #4 -; V7A-T-NEXT: and r2, r0, #2 -; V7A-T-NEXT: lsls r0, r0, #31 -; V7A-T-NEXT: bne .LBB0_2 +; V7A-T-NEXT: lsls r2, r0, #31 +; V7A-T-NEXT: bne .LBB0_3 ; V7A-T-NEXT: @ %bb.1: @ %if.then -; V7A-T-NEXT: cmp r2, #0 -; V7A-T-NEXT: it ne -; V7A-T-NEXT: movne.w r2, #33024 -; V7A-T-NEXT: cmp r3, #0 -; V7A-T-NEXT: it ne -; V7A-T-NEXT: addne.w r2, r2, #16512 -; V7A-T-NEXT: b .LBB0_3 -; V7A-T-NEXT: .LBB0_2: @ %if.else -; V7A-T-NEXT: cmp r2, #0 -; V7A-T-NEXT: it ne -; V7A-T-NEXT: movne.w r2, #8256 -; V7A-T-NEXT: cmp r3, #0 -; V7A-T-NEXT: it ne -; V7A-T-NEXT: addne.w r2, r2, #4128 +; V7A-T-NEXT: tst.w r0, #14 +; V7A-T-NEXT: beq .LBB0_6 +; V7A-T-NEXT: @ %bb.2: +; V7A-T-NEXT: lsls r2, r0, #30 +; V7A-T-NEXT: mov.w r3, #33024 +; V7A-T-NEXT: and.w r2, r3, r2, asr #31 +; V7A-T-NEXT: lsrs r0, r0, #2 +; V7A-T-NEXT: bfi r2, r0, #7, #1 +; V7A-T-NEXT: bfi r2, r0, #14, #1 +; V7A-T-NEXT: b .LBB0_5 ; V7A-T-NEXT: .LBB0_3: @ %if.else -; V7A-T-NEXT: cmp.w r12, #0 -; V7A-T-NEXT: it ne -; V7A-T-NEXT: strne r2, [r1] +; V7A-T-NEXT: tst.w r0, #14 +; V7A-T-NEXT: it eq +; V7A-T-NEXT: bxeq lr +; V7A-T-NEXT: .LBB0_4: +; V7A-T-NEXT: lsls r2, r0, #30 +; V7A-T-NEXT: mov.w r3, #8256 +; V7A-T-NEXT: and.w r2, r3, r2, asr #31 +; V7A-T-NEXT: lsrs r0, r0, #2 +; V7A-T-NEXT: bfi r2, r0, #5, #1 +; V7A-T-NEXT: bfi r2, r0, #12, #1 +; V7A-T-NEXT: .LBB0_5: @ %if.end +; V7A-T-NEXT: str r2, [r1] +; V7A-T-NEXT: .LBB0_6: @ %exit ; V7A-T-NEXT: bx lr ; ; V6M-LABEL: f: @@ -176,17 +189,16 @@ define i32 @f0(i1 %c0, i32 %v) { ; V7M-LABEL: f0: ; V7M: @ %bb.0: @ %E -; V7M-NEXT: bic r1, r1, #-16843010 ; V7M-NEXT: lsls r0, r0, #31 ; V7M-NEXT: beq .LBB1_2 ; V7M-NEXT: @ %bb.1: @ %A -; V7M-NEXT: cmp r1, #0 +; V7M-NEXT: tst.w r1, #16843009 ; V7M-NEXT: itt eq ; V7M-NEXT: moveq r0, #0 ; V7M-NEXT: bxeq lr ; V7M-NEXT: b .LBB1_3 ; V7M-NEXT: .LBB1_2: @ %B -; V7M-NEXT: cmp r1, #0 +; V7M-NEXT: tst.w r1, #16843009 ; V7M-NEXT: itt ne ; V7M-NEXT: movne r0, #0 ; V7M-NEXT: bxne lr @@ -216,17 +228,16 @@ ; ; V7A-T-LABEL: f0: ; V7A-T: @ %bb.0: @ %E -; V7A-T-NEXT: bic r1, r1, #-16843010 ; V7A-T-NEXT: lsls r0, r0, #31 ; V7A-T-NEXT: beq .LBB1_2 ; V7A-T-NEXT: @ %bb.1: @ %A -; V7A-T-NEXT: cmp r1, #0 +; V7A-T-NEXT: tst.w r1, #16843009 ; V7A-T-NEXT: itt eq ; V7A-T-NEXT: moveq r0, #0 ; V7A-T-NEXT: bxeq lr ; V7A-T-NEXT: b .LBB1_3 ; V7A-T-NEXT: .LBB1_2: @ %B -; V7A-T-NEXT: cmp r1, #0 +; V7A-T-NEXT: tst.w r1, #16843009 ; V7A-T-NEXT: itt ne ; V7A-T-NEXT: movne r0, #0 ; V7A-T-NEXT: bxne lr @@ -286,17 +297,16 @@ define i32 @f1(i1 %c0, i32 %v) { ; V7M-LABEL: f1: ; V7M: @ %bb.0: @ %E -; V7M-NEXT: and r1, r1, #100663296 ; V7M-NEXT: lsls r0, r0, #31 ; V7M-NEXT: beq .LBB2_2 ; V7M-NEXT: @ %bb.1: @ %A -; V7M-NEXT: cmp r1, #0 +; V7M-NEXT: tst.w r1, #100663296 ; V7M-NEXT: itt eq ; V7M-NEXT: moveq r0, #0 ; V7M-NEXT: bxeq lr ; V7M-NEXT: b .LBB2_3 ; V7M-NEXT: .LBB2_2: @ %B -; V7M-NEXT: cmp r1, #0 +; V7M-NEXT: tst.w r1, #100663296 ; V7M-NEXT: itt ne ; V7M-NEXT: movne r0, #0 ; V7M-NEXT: bxne lr @@ -306,11 +316,10 @@ ; ; V7A-LABEL: f1: ; V7A: @ %bb.0: @ %E -; V7A-NEXT: and r1, r1, #100663296 ; V7A-NEXT: tst r0, #1 ; V7A-NEXT: beq .LBB2_3 ; V7A-NEXT: @ %bb.1: @ %A -; V7A-NEXT: cmp r1, #0 +; V7A-NEXT: tst r1, #100663296 ; V7A-NEXT: moveq r0, #0 ; V7A-NEXT: bxeq lr ; V7A-NEXT: .LBB2_2: @ %D @@ -318,23 +327,22 @@ ; V7A-NEXT: bx lr ; V7A-NEXT: .LBB2_3: @ %B ; V7A-NEXT: mov r0, #0 -; V7A-NEXT: cmp r1, #0 +; V7A-NEXT: tst r1, #100663296 ; V7A-NEXT: moveq r0, #1 ; V7A-NEXT: bx lr ; ; V7A-T-LABEL: f1: ; V7A-T: @ %bb.0: @ %E -; V7A-T-NEXT: and r1, r1, #100663296 ; V7A-T-NEXT: lsls r0, r0, #31 ; V7A-T-NEXT: beq .LBB2_2 ; V7A-T-NEXT: @ %bb.1: @ %A -; V7A-T-NEXT: cmp r1, #0 +; V7A-T-NEXT: tst.w r1, #100663296 ; V7A-T-NEXT: itt eq ; V7A-T-NEXT: moveq r0, #0 ; V7A-T-NEXT: bxeq lr ; V7A-T-NEXT: b .LBB2_3 ; V7A-T-NEXT: .LBB2_2: @ %B -; V7A-T-NEXT: cmp r1, #0 +; V7A-T-NEXT: tst.w r1, #100663296 ; V7A-T-NEXT: itt ne ; V7A-T-NEXT: movne r0, #0 ; V7A-T-NEXT: bxne lr diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1426,12 +1426,11 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: ldrd r12, r6, [r0, #4] ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: ldrb r0, [r0] -; CHECK-NEXT: and r8, r3, #1 +; CHECK-NEXT: ldrd r12, r6, [r0, #4] +; CHECK-NEXT: ldrb.w r9, [r0] ; CHECK-NEXT: vldr.16 s0, .LCPI17_0 -; CHECK-NEXT: lsr.w r9, r3, #1 +; CHECK-NEXT: lsr.w r8, r3, #1 ; CHECK-NEXT: b .LBB17_3 ; CHECK-NEXT: .LBB17_1: @ %if.else ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 @@ -1441,7 +1440,7 @@ ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 ; CHECK-NEXT: vstr.16 s5, [r12, #2] ; CHECK-NEXT: adds r6, #10 -; CHECK-NEXT: subs r0, #1 +; CHECK-NEXT: subs.w r9, r9, #1 ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: beq .LBB17_8 @@ -1458,7 +1457,7 @@ ; CHECK-NEXT: vldrh.u16 q1, [r12] ; CHECK-NEXT: vmov.f32 s5, s1 ; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: wls lr, r9, .LBB17_6 +; CHECK-NEXT: wls lr, r8, .LBB17_6 ; CHECK-NEXT: @ %bb.4: @ %while.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 ; CHECK-NEXT: mov r5, r2 @@ -1466,7 +1465,7 @@ ; CHECK-NEXT: @ Parent Loop BB17_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldrh r7, [r1], #4 -; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vfma.f16 q1, q2, r7 ; CHECK-NEXT: ldrh r4, [r1, #-2] ; CHECK-NEXT: vmov.u16 r7, q1[0] @@ -1478,19 +1477,19 @@ ; CHECK-NEXT: strh r4, [r5, #2] ; CHECK-NEXT: vmov.f32 s4, s5 ; CHECK-NEXT: strh r7, [r5], #4 -; CHECK-NEXT: vmov.16 q1[2], r3 +; CHECK-NEXT: vmov.16 q1[2], r0 ; CHECK-NEXT: le lr, .LBB17_5 ; CHECK-NEXT: .LBB17_6: @ %while.end ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 -; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: lsls r0, r3, #31 ; CHECK-NEXT: beq .LBB17_1 ; CHECK-NEXT: @ %bb.7: @ %if.then ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 -; CHECK-NEXT: ldrh r1, [r1] -; CHECK-NEXT: vfma.f16 q1, q2, r1 -; CHECK-NEXT: vmov.u16 r1, q1[0] -; CHECK-NEXT: vfma.f16 q1, q3, r1 -; CHECK-NEXT: strh r1, [r5] +; CHECK-NEXT: ldrh r0, [r1] +; CHECK-NEXT: vfma.f16 q1, q2, r0 +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vfma.f16 q1, q3, r0 +; CHECK-NEXT: strh r0, [r5] ; CHECK-NEXT: vmovx.f16 s2, s4 ; CHECK-NEXT: vstr.16 s2, [r12] ; CHECK-NEXT: b .LBB17_2 diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -2015,9 +2015,8 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: ldrd r12, r6, [r0, #4] -; CHECK-NEXT: and r8, r3, #1 +; CHECK-NEXT: lsr.w r8, r3, #1 ; CHECK-NEXT: ldrb r0, [r0] -; CHECK-NEXT: lsrs r3, r3, #1 ; CHECK-NEXT: vldr s0, .LCPI20_0 ; CHECK-NEXT: b .LBB20_3 ; CHECK-NEXT: .LBB20_1: @ %if.else @@ -2046,7 +2045,7 @@ ; CHECK-NEXT: vmov.f32 s6, s0 ; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: vmov.f32 s7, s0 -; CHECK-NEXT: wls lr, r3, .LBB20_6 +; CHECK-NEXT: wls lr, r8, .LBB20_6 ; CHECK-NEXT: @ %bb.4: @ %while.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 ; CHECK-NEXT: vmov q6, q1 @@ -2073,7 +2072,7 @@ ; CHECK-NEXT: le lr, .LBB20_5 ; CHECK-NEXT: .LBB20_6: @ %while.end ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 -; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: lsls r7, r3, #31 ; CHECK-NEXT: beq .LBB20_1 ; CHECK-NEXT: @ %bb.7: @ %if.then ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1