diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -86,16 +86,20 @@ bool CheckProfitability = true); bool SelectImmShifterOperand(SDValue N, SDValue &A, SDValue &B, bool CheckProfitability = true); - bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, - SDValue &B, SDValue &C) { + bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, + SDValue &C) { // Don't apply the profitability check return SelectRegShifterOperand(N, A, B, C, false); } - bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, - SDValue &B) { + bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { // Don't apply the profitability check return SelectImmShifterOperand(N, A, B, false); } + bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { + if (!N.hasOneUse()) + return false; + return SelectImmShifterOperand(N, A, B, false); + } bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -63,6 +63,12 @@ let MIOperandInfo = (ops rGPR, i32imm); } +// Same as above, but only matching on a single use node. +def t2_so_reg_oneuse : Operand, + ComplexPattern; + // t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value def t2_so_imm_not_XFORM : SDNodeXFormgetTargetConstant(~((uint32_t)N->getZExtValue()), SDLoc(N), @@ -3377,8 +3383,8 @@ (t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>; def : T2Pat<(ARMcmpZ GPRnopc:$lhs, rGPR:$rhs), (t2CMPrr GPRnopc:$lhs, rGPR:$rhs)>; -def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_reg:$rhs), - (t2CMPrs GPRnopc:$lhs, t2_so_reg:$rhs)>; +def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_reg_oneuse:$rhs), + (t2CMPrs GPRnopc:$lhs, t2_so_reg_oneuse:$rhs)>; let isCompare = 1, Defs = [CPSR] in { // shifted imm diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll @@ -9,9 +9,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: ldr r1, [sp, #8] ; CHECK-NEXT: mov r12, r3 -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: add.w lr, r1, #3 -; CHECK-NEXT: cmp.w r3, lr, lsr #2 +; CHECK-NEXT: adds r3, r1, #3 +; CHECK-NEXT: lsrs r3, r3, #2 ; CHECK-NEXT: beq .LBB0_3 ; CHECK-NEXT: @ %bb.1: @ %do.body.preheader ; CHECK-NEXT: dlstp.32 lr, r1 @@ -55,12 +54,11 @@ define void @nested(i32* nocapture readonly %x, i32* nocapture readnone %y, i32* nocapture %z, i32 %m, i32 %n) { ; CHECK-LABEL: nested: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cbz r3, .LBB1_8 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-NEXT: ldr r5, [sp, #28] -; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: ldr.w r12, [sp, #24] ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: b .LBB1_4 ; CHECK-NEXT: .LBB1_2: @ in Loop: Header=BB1_4 Depth=1 @@ -74,15 +72,15 @@ ; CHECK-NEXT: .LBB1_4: @ %for.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB1_6 Depth 2 -; CHECK-NEXT: adds r7, r5, #3 -; CHECK-NEXT: cmp.w r12, r7, lsr #2 +; CHECK-NEXT: add.w r6, r12, #3 +; CHECK-NEXT: lsrs r7, r6, #2 ; CHECK-NEXT: beq .LBB1_2 ; CHECK-NEXT: @ %bb.5: @ %do.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: bic r9, r7, #3 +; CHECK-NEXT: bic r5, r6, #3 ; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: add.w r8, r0, r9, lsl #2 -; CHECK-NEXT: dlstp.32 lr, r5 +; CHECK-NEXT: add.w r8, r0, r5, lsl #2 +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB1_6: @ %do.body ; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 @@ -91,11 +89,11 @@ ; CHECK-NEXT: letp lr, .LBB1_6 ; CHECK-NEXT: @ %bb.7: @ %if.end.loopexit ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: sub.w r5, r5, r9 +; CHECK-NEXT: sub.w r12, r12, r5 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: b .LBB1_3 ; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %cmp20.not = icmp eq i32 %m, 0 br i1 %cmp20.not, label %for.cond.cleanup, label %for.body diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1566,13 +1566,11 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: ldr.w lr, [sp, #16] -; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: cmp.w r4, lr, lsr #2 +; CHECK-NEXT: ldr r4, [sp, #16] +; CHECK-NEXT: lsrs r5, r4, #2 ; CHECK-NEXT: beq .LBB18_5 ; CHECK-NEXT: @ %bb.1: @ %do.body.preheader ; CHECK-NEXT: ldr.w r12, [sp, #20] -; CHECK-NEXT: lsr.w r5, lr, #2 ; CHECK-NEXT: .LBB18_2: @ %do.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB18_3 Depth 2 diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -1064,76 +1064,73 @@ ; CHECK-NEXT: .pad #56 ; CHECK-NEXT: sub sp, #56 ; CHECK-NEXT: cmp r2, #8 -; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: vstr s0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: vstr s0, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: mov r1, r2 -; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: blo .LBB7_9 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: mov.w r10, #0 -; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: mov.w r12, #1 +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: lsrs r1, r2, #2 ; CHECK-NEXT: b .LBB7_3 ; CHECK-NEXT: .LBB7_2: @ in Loop: Header=BB7_3 Depth=1 -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: add.w r10, r10, #1 -; CHECK-NEXT: lsls r3, r3, #2 +; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: add.w r11, r11, #1 +; CHECK-NEXT: lsl.w r12, r12, #2 ; CHECK-NEXT: cmp r2, #7 ; CHECK-NEXT: asr.w r1, r2, #2 ; CHECK-NEXT: ble .LBB7_9 ; CHECK-NEXT: .LBB7_3: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB7_6 Depth 2 ; CHECK-NEXT: @ Child Loop BB7_7 Depth 3 -; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: cmp.w r12, #1 +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: lsr.w r2, r1, #2 -; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: blt .LBB7_2 ; CHECK-NEXT: @ %bb.4: @ in Loop: Header=BB7_3 Depth=1 -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: cmp.w r2, r1, lsr #3 +; CHECK-NEXT: lsrs r2, r1, #3 +; CHECK-NEXT: str r2, [sp, #32] @ 4-byte Spill ; CHECK-NEXT: beq .LBB7_2 ; CHECK-NEXT: @ %bb.5: @ %.preheader ; CHECK-NEXT: @ in Loop: Header=BB7_3 Depth=1 -; CHECK-NEXT: lsrs r2, r1, #3 +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: lsls r1, r1, #1 -; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: movs r6, #0 -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill -; CHECK-NEXT: lsl.w r11, r2, #1 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: lsl.w r10, r2, #1 ; CHECK-NEXT: .LBB7_6: @ Parent Loop BB7_3 Depth=1 ; CHECK-NEXT: @ => This Loop Header: Depth=2 ; CHECK-NEXT: @ Child Loop BB7_7 Depth 3 -; CHECK-NEXT: ldrd r3, lr, [r0, #24] -; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: ldrd r12, r2, [r0, #16] -; CHECK-NEXT: ldr.w r3, [r3, r10, lsl #2] -; CHECK-NEXT: muls r1, r6, r1 -; CHECK-NEXT: ldr.w r2, [r2, r10, lsl #2] +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldrd lr, r2, [r0, #16] +; CHECK-NEXT: ldrd r3, r8, [r0, #24] +; CHECK-NEXT: muls r1, r4, r1 +; CHECK-NEXT: ldr.w r2, [r2, r11, lsl #2] ; CHECK-NEXT: ldrd r7, r5, [r0, #32] +; CHECK-NEXT: ldr.w r3, [r3, r11, lsl #2] +; CHECK-NEXT: ldr.w r6, [lr, r11, lsl #2] +; CHECK-NEXT: add.w r7, r7, r2, lsl #2 +; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: add.w r5, r5, r3, lsl #2 -; CHECK-NEXT: ldr.w r4, [r12, r10, lsl #2] -; CHECK-NEXT: add.w r3, r7, r2, lsl #2 -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: add.w r2, r2, r1, lsl #2 -; CHECK-NEXT: add.w r12, lr, r4, lsl #2 -; CHECK-NEXT: add.w r1, r2, r11, lsl #2 -; CHECK-NEXT: dls lr, r7 -; CHECK-NEXT: add.w r8, r1, r11, lsl #2 -; CHECK-NEXT: add.w r9, r8, r11, lsl #2 +; CHECK-NEXT: add.w r1, r2, r1, lsl #2 +; CHECK-NEXT: add.w r3, r8, r6, lsl #2 +; CHECK-NEXT: ldr r6, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: add.w r2, r1, r10, lsl #2 +; CHECK-NEXT: add.w r8, r2, r10, lsl #2 +; CHECK-NEXT: add.w r9, r8, r10, lsl #2 +; CHECK-NEXT: dls lr, r6 ; CHECK-NEXT: .LBB7_7: @ Parent Loop BB7_3 Depth=1 ; CHECK-NEXT: @ Parent Loop BB7_6 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 ; CHECK-NEXT: vldrw.u32 q3, [r9] -; CHECK-NEXT: vldrw.u32 q4, [r1] +; CHECK-NEXT: vldrw.u32 q4, [r2] ; CHECK-NEXT: vldrw.u32 q6, [r8] -; CHECK-NEXT: vldrw.u32 q7, [r2] +; CHECK-NEXT: vldrw.u32 q7, [r1] ; CHECK-NEXT: vsub.f32 q5, q4, q3 ; CHECK-NEXT: vsub.f32 q0, q7, q6 ; CHECK-NEXT: vcadd.f32 q1, q0, q5, #270 @@ -1142,12 +1139,12 @@ ; CHECK-NEXT: vadd.f32 q3, q6, q7 ; CHECK-NEXT: vsub.f32 q4, q3, q0 ; CHECK-NEXT: vadd.f32 q0, q3, q0 -; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: vldrw.u32 q0, [r3], #16 +; CHECK-NEXT: vstrb.8 q0, [r1], #16 +; CHECK-NEXT: vldrw.u32 q0, [r7], #16 ; CHECK-NEXT: vcmul.f32 q3, q0, q4, #0 ; CHECK-NEXT: vcmla.f32 q3, q0, q4, #90 -; CHECK-NEXT: vstrb.8 q3, [r1], #16 -; CHECK-NEXT: vldrw.u32 q0, [r12], #16 +; CHECK-NEXT: vstrb.8 q3, [r2], #16 +; CHECK-NEXT: vldrw.u32 q0, [r3], #16 ; CHECK-NEXT: vcmul.f32 q3, q0, q2, #0 ; CHECK-NEXT: vcmla.f32 q3, q0, q2, #90 ; CHECK-NEXT: vstrb.8 q3, [r8], #16 @@ -1157,20 +1154,19 @@ ; CHECK-NEXT: vstrb.8 q2, [r9], #16 ; CHECK-NEXT: le lr, .LBB7_7 ; CHECK-NEXT: @ %bb.8: @ in Loop: Header=BB7_6 Depth=2 -; CHECK-NEXT: ldr r3, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: adds r6, #1 -; CHECK-NEXT: cmp r6, r3 +; CHECK-NEXT: adds r4, #1 +; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: bne .LBB7_6 ; CHECK-NEXT: b .LBB7_2 ; CHECK-NEXT: .LBB7_9: -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: lsrs r0, r0, #3 ; CHECK-NEXT: wls lr, r0, .LBB7_12 ; CHECK-NEXT: @ %bb.10: ; CHECK-NEXT: adr r0, .LCPI7_0 -; CHECK-NEXT: vldr s0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vldr s0, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: vadd.i32 q1, q1, r0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vldrw.u32 q2, [q1, #64]! diff --git a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll --- a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll @@ -11,8 +11,7 @@ ; CHECK-NEXT: .pad #24 ; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: mul r12, r3, r2 -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: cmp.w r2, r12, lsr #2 +; CHECK-NEXT: lsrs.w r2, r12, #2 ; CHECK-NEXT: beq.w .LBB0_3 ; CHECK-NEXT: @ %bb.1: @ %vector.ph ; CHECK-NEXT: mvn r3, #7