diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -5938,14 +5938,15 @@ unsigned ShPartsOpc = ARMISD::LSLL; ConstantSDNode *Con = dyn_cast(ShAmt); - // If the shift amount is greater than 32 then do the default optimisation - if (Con && Con->getZExtValue() > 32) + // If the shift amount is greater than 32 or has a greater bitwidth than 64 + // then do the default optimisation + if (ShAmt->getValueType(0).getSizeInBits() > 64 || + (Con && Con->getZExtValue() >= 32)) return SDValue(); - // Extract the lower 32 bits of the shift amount if it's an i64 - if (ShAmt->getValueType(0) == MVT::i64) - ShAmt = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ShAmt, - DAG.getConstant(0, dl, MVT::i32)); + // Extract the lower 32 bits of the shift amount if it's not an i32 + if (ShAmt->getValueType(0) != MVT::i32) + ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32); if (ShOpc == ISD::SRL) { if (!Con) diff --git a/llvm/test/CodeGen/ARM/shift_parts.ll b/llvm/test/CodeGen/ARM/shift_parts.ll --- a/llvm/test/CodeGen/ARM/shift_parts.ll +++ b/llvm/test/CodeGen/ARM/shift_parts.ll @@ -52,7 +52,8 @@ define i64 @shift_left_imm_big2(i64 %x) { ; CHECK-MVE-LABEL: shift_left_imm_big2: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: lsll r0, r1, #32 +; CHECK-MVE-NEXT: mov r1, r0 +; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-NON-MVE-LABEL: shift_left_imm_big2: @@ -128,7 +129,8 @@ define i64 @shift_right_imm_big2(i64 %x) { ; CHECK-MVE-LABEL: shift_right_imm_big2: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: lsrl r0, r1, #32 +; CHECK-MVE-NEXT: mov r0, r1 +; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-NON-MVE-LABEL: shift_right_imm_big2: @@ -219,3 +221,62 @@ store i40 %bf.clear, i40* %0, align 1 ret void } + +%struct.a = type { i96 } + +define void @lsll_128bit_shift(%struct.a* nocapture %x) local_unnamed_addr #0 { +; CHECK-MVE-LABEL: lsll_128bit_shift: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: strd r1, r1, [r0] +; CHECK-MVE-NEXT: str r1, [r0, #8] +; CHECK-MVE-NEXT: bx lr +; +; CHECK-NON-MVE-LABEL: lsll_128bit_shift: +; CHECK-NON-MVE: @ %bb.0: @ %entry +; CHECK-NON-MVE-NEXT: movs r1, #0 +; CHECK-NON-MVE-NEXT: str r1, [r0] +; CHECK-NON-MVE-NEXT: str r1, [r0, #4] +; CHECK-NON-MVE-NEXT: str r1, [r0, #8] +; CHECK-NON-MVE-NEXT: bx lr +entry: + %0 = bitcast %struct.a* %x to i128* + %bf.load = load i128, i128* %0, align 8 + %bf.clear4 = and i128 %bf.load, -79228162514264337593543950336 + store i128 %bf.clear4, i128* %0, align 8 + ret void +} + +%struct.b = type { i184 } + +define void @lsll_256bit_shift(%struct.b* nocapture %x) local_unnamed_addr #0 { +; CHECK-MVE-LABEL: lsll_256bit_shift: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: str r1, [r0, #16] +; CHECK-MVE-NEXT: strd r1, r1, [r0, #8] +; CHECK-MVE-NEXT: strd r1, r1, [r0] +; CHECK-MVE-NEXT: ldrb r1, [r0, #23] +; CHECK-MVE-NEXT: lsls r1, r1, #24 +; CHECK-MVE-NEXT: str r1, [r0, #20] +; CHECK-MVE-NEXT: bx lr +; +; CHECK-NON-MVE-LABEL: lsll_256bit_shift: +; CHECK-NON-MVE: @ %bb.0: @ %entry +; CHECK-NON-MVE-NEXT: movs r1, #0 +; CHECK-NON-MVE-NEXT: str r1, [r0, #16] +; CHECK-NON-MVE-NEXT: str r1, [r0, #8] +; CHECK-NON-MVE-NEXT: str r1, [r0, #12] +; CHECK-NON-MVE-NEXT: str r1, [r0] +; CHECK-NON-MVE-NEXT: str r1, [r0, #4] +; CHECK-NON-MVE-NEXT: ldrb r1, [r0, #23] +; CHECK-NON-MVE-NEXT: lsls r1, r1, #24 +; CHECK-NON-MVE-NEXT: str r1, [r0, #20] +; CHECK-NON-MVE-NEXT: bx lr +entry: + %0 = bitcast %struct.b* %x to i192* + %bf.load = load i192, i192* %0, align 8 + %bf.clear4 = and i192 %bf.load, -24519928653854221733733552434404946937899825954937634816 + store i192 %bf.clear4, i192* %0, align 8 + ret void +} diff --git a/llvm/test/CodeGen/Thumb2/mve-abs.ll b/llvm/test/CodeGen/Thumb2/mve-abs.ll --- a/llvm/test/CodeGen/Thumb2/mve-abs.ll +++ b/llvm/test/CodeGen/Thumb2/mve-abs.ll @@ -40,43 +40,39 @@ define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) { ; CHECK-LABEL: abs_v2i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vmov r12, s2 -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: vmov r3, s3 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: rsbs.w lr, r12, #0 -; CHECK-NEXT: sbc.w r5, r0, r3 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r2, lr -; CHECK-NEXT: lsrl r2, r5, #32 -; CHECK-NEXT: mov.w r5, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: vmov r4, s0 +; CHECK-NEXT: rsbs.w r3, r12, #0 +; CHECK-NEXT: sbc.w lr, r2, r0 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it mi -; CHECK-NEXT: movmi r5, #1 -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: movmi r1, #1 +; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq r2, r3 -; CHECK-NEXT: vmov r3, s1 -; CHECK-NEXT: rsbs r4, r1, #0 -; CHECK-NEXT: mov r6, r4 -; CHECK-NEXT: sbc.w r7, r0, r3 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: lsrl r6, r7, #32 -; CHECK-NEXT: it mi -; CHECK-NEXT: movmi r0, #1 +; CHECK-NEXT: moveq lr, r0 +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: rsbs r5, r4, #0 +; CHECK-NEXT: sbc.w r6, r2, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ite eq -; CHECK-NEXT: moveq r6, r3 -; CHECK-NEXT: movne r1, r4 -; CHECK-NEXT: vmov.32 q0[0], r1 -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq r6, r0 +; CHECK-NEXT: moveq r5, r4 +; CHECK-NEXT: vmov.32 q0[0], r5 ; CHECK-NEXT: vmov.32 q0[1], r6 +; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq lr, r12 -; CHECK-NEXT: vmov.32 q0[2], lr -; CHECK-NEXT: vmov.32 q0[3], r2 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: moveq r3, r12 +; CHECK-NEXT: vmov.32 q0[2], r3 +; CHECK-NEXT: vmov.32 q0[3], lr +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %0 = icmp slt <2 x i64> %s1, zeroinitializer %1 = sub nsw <2 x i64> zeroinitializer, %s1 diff --git a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll --- a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll +++ b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll @@ -755,14 +755,12 @@ ; CHECK-NEXT: vmov r2, s18 ; CHECK-NEXT: vmov r3, s19 ; CHECK-NEXT: vmov.32 q4[0], r0 -; CHECK-NEXT: lsrl r0, r1, #32 -; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov.32 q4[1], r1 ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __aeabi_uldivmod ; CHECK-NEXT: vmov.32 q4[2], r0 -; CHECK-NEXT: lsrl r0, r1, #32 -; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov.32 q4[3], r1 ; CHECK-NEXT: vmov q0, q4 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} @@ -790,14 +788,12 @@ ; CHECK-NEXT: vmov r2, s18 ; CHECK-NEXT: vmov r3, s19 ; CHECK-NEXT: vmov.32 q4[0], r0 -; CHECK-NEXT: lsrl r0, r1, #32 -; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov.32 q4[1], r1 ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __aeabi_ldivmod ; CHECK-NEXT: vmov.32 q4[2], r0 -; CHECK-NEXT: lsrl r0, r1, #32 -; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov.32 q4[3], r1 ; CHECK-NEXT: vmov q0, q4 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll --- a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll @@ -382,14 +382,12 @@ ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: vmov.32 q4[0], r0 -; CHECK-NEXT: lsrl r0, r1, #32 -; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov.32 q4[1], r1 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: mov r1, r3 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vmov.32 q4[2], r0 -; CHECK-NEXT: lsrl r0, r1, #32 -; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov.32 q4[3], r1 ; CHECK-NEXT: vmov q0, q4 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r7, pc} @@ -410,14 +408,12 @@ ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: vmov.32 q4[0], r0 -; CHECK-NEXT: lsrl r0, r1, #32 -; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov.32 q4[1], r1 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: mov r1, r3 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vmov.32 q4[2], r0 -; CHECK-NEXT: lsrl r0, r1, #32 -; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov.32 q4[3], r1 ; CHECK-NEXT: vmov q0, q4 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r7, pc}