diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -4681,12 +4681,6 @@ case Intrinsic::arm_mve_sqrshrl: SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); return; - case Intrinsic::arm_mve_lsll: - SelectMVE_LongShift(N, ARM::MVE_LSLLr, false, false); - return; - case Intrinsic::arm_mve_asrl: - SelectMVE_LongShift(N, ARM::MVE_ASRLr, false, false); - return; case Intrinsic::arm_mve_vadc: case Intrinsic::arm_mve_vadc_predicated: diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3808,6 +3808,12 @@ case Intrinsic::arm_mve_vreinterpretq: return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(), Op.getOperand(1)); + case Intrinsic::arm_mve_lsll: + return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::arm_mve_asrl: + return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } } @@ -14138,6 +14144,34 @@ return SDValue(); } +static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from + // uses of the intrinsics. + if (auto C = dyn_cast(N->getOperand(2))) { + int ShiftAmt = C->getSExtValue(); + if (ShiftAmt == 0) { + SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL); + DAG.ReplaceAllUsesWith(N, Merge.getNode()); + return SDValue(); + } + + if (ShiftAmt >= -32 && ShiftAmt < 0) { + unsigned NewOpcode = + N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL; + SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1, + DAG.getConstant(-ShiftAmt, DL, MVT::i32)); + DAG.ReplaceAllUsesWith(N, NewShift.getNode()); + return NewShift; + } + } + + return SDValue(); +} + /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); @@ -15033,6 +15067,10 @@ return PerformVCMPCombine(N, DCI, Subtarget); case ISD::VECREDUCE_ADD: return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget); + case ARMISD::ASRL: + case ARMISD::LSRL: + case ARMISD::LSLL: + return PerformLongShiftCombine(N, DCI.DAG); case ARMISD::SMULWB: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/longshift-const.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/longshift-const.ll --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/longshift-const.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/longshift-const.ll @@ -7,8 +7,6 @@ define i64 @asrl_0(i64 %X) { ; CHECK-LABEL: asrl_0: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: asrl r0, r1, r2 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -27,8 +25,7 @@ define i64 @asrl_23(i64 %X) { ; CHECK-LABEL: asrl_23: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #23 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrl r0, r1, #23 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -47,8 +44,7 @@ define i64 @asrl_32(i64 %X) { ; CHECK-LABEL: asrl_32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #32 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrl r0, r1, #32 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -127,8 +123,7 @@ define i64 @asrl_m2(i64 %X) { ; CHECK-LABEL: asrl_m2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #1 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #2 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -147,8 +142,7 @@ define i64 @asrl_m32(i64 %X) { ; CHECK-LABEL: asrl_m32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #31 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #32 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -210,8 +204,6 @@ define i64 @lsll_0(i64 %X) { ; CHECK-LABEL: lsll_0: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: lsll r0, r1, r2 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -230,8 +222,7 @@ define i64 @lsll_23(i64 %X) { ; CHECK-LABEL: lsll_23: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #23 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #23 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -250,8 +241,7 @@ define i64 @lsll_32(i64 %X) { ; CHECK-LABEL: lsll_32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #32 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #32 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -330,8 +320,7 @@ define i64 @lsll_m2(i64 %X) { ; CHECK-LABEL: lsll_m2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #1 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsrl r0, r1, #2 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -350,8 +339,7 @@ define i64 @lsll_m32(i64 %X) { ; CHECK-LABEL: lsll_m32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #31 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsrl r0, r1, #32 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/longshift-demand.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/longshift-demand.ll --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/longshift-demand.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/longshift-demand.ll @@ -7,8 +7,7 @@ define i32 @ashr_demand_bottom3(i64 %X) { ; CHECK-LABEL: ashr_demand_bottom3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #3 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrl r0, r1, #3 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -28,8 +27,7 @@ define i32 @lsll_demand_bottom3(i64 %X) { ; CHECK-LABEL: lsll_demand_bottom3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #3 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #3 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -49,8 +47,7 @@ define i32 @ashr_demand_bottomm3(i64 %X) { ; CHECK-LABEL: ashr_demand_bottomm3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #2 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #3 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -70,8 +67,7 @@ define i32 @lsll_demand_bottomm3(i64 %X) { ; CHECK-LABEL: lsll_demand_bottomm3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #2 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsrl r0, r1, #3 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -92,8 +88,7 @@ define i32 @ashr_demand_bottom31(i64 %X) { ; CHECK-LABEL: ashr_demand_bottom31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #31 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrl r0, r1, #31 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -113,8 +108,7 @@ define i32 @lsll_demand_bottom31(i64 %X) { ; CHECK-LABEL: lsll_demand_bottom31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #31 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #31 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -134,8 +128,7 @@ define i32 @ashr_demand_bottomm31(i64 %X) { ; CHECK-LABEL: ashr_demand_bottomm31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #30 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #31 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -155,8 +148,7 @@ define i32 @lsll_demand_bottomm31(i64 %X) { ; CHECK-LABEL: lsll_demand_bottomm31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #30 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsrl r0, r1, #31 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -177,8 +169,7 @@ define i32 @ashr_demand_bottom32(i64 %X) { ; CHECK-LABEL: ashr_demand_bottom32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #32 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrl r0, r1, #32 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -198,8 +189,7 @@ define i32 @lsll_demand_bottom32(i64 %X) { ; CHECK-LABEL: lsll_demand_bottom32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #32 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #32 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -219,8 +209,7 @@ define i32 @ashr_demand_bottomm32(i64 %X) { ; CHECK-LABEL: ashr_demand_bottomm32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #31 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #32 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -240,8 +229,7 @@ define i32 @lsll_demand_bottomm32(i64 %X) { ; CHECK-LABEL: lsll_demand_bottomm32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #31 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsrl r0, r1, #32 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -352,8 +340,7 @@ define i32 @ashr_demand_top3(i64 %X) { ; CHECK-LABEL: ashr_demand_top3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #3 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrl r0, r1, #3 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: @@ -375,8 +362,7 @@ define i32 @lsll_demand_top3(i64 %X) { ; CHECK-LABEL: lsll_demand_top3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #3 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #3 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: @@ -398,8 +384,7 @@ define i32 @ashr_demand_topm3(i64 %X) { ; CHECK-LABEL: ashr_demand_topm3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #2 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #3 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: @@ -421,8 +406,7 @@ define i32 @lsll_demand_topm3(i64 %X) { ; CHECK-LABEL: lsll_demand_topm3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #2 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsrl r0, r1, #3 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: @@ -445,8 +429,7 @@ define i32 @ashr_demand_top31(i64 %X) { ; CHECK-LABEL: ashr_demand_top31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #31 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrl r0, r1, #31 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: @@ -468,8 +451,7 @@ define i32 @lsll_demand_top31(i64 %X) { ; CHECK-LABEL: lsll_demand_top31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #31 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #31 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: @@ -491,8 +473,7 @@ define i32 @ashr_demand_topm31(i64 %X) { ; CHECK-LABEL: ashr_demand_topm31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #30 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #31 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: @@ -514,8 +495,7 @@ define i32 @lsll_demand_topm31(i64 %X) { ; CHECK-LABEL: lsll_demand_topm31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #30 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsrl r0, r1, #31 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: @@ -538,8 +518,7 @@ define i32 @ashr_demand_top32(i64 %X) { ; CHECK-LABEL: ashr_demand_top32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #32 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrl r0, r1, #32 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: @@ -561,8 +540,7 @@ define i32 @lsll_demand_top32(i64 %X) { ; CHECK-LABEL: lsll_demand_top32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #32 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #32 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: @@ -584,8 +562,7 @@ define i32 @ashr_demand_topm32(i64 %X) { ; CHECK-LABEL: ashr_demand_topm32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #31 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #32 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: @@ -607,8 +584,7 @@ define i32 @lsll_demand_topm32(i64 %X) { ; CHECK-LABEL: lsll_demand_topm32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #31 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsrl r0, r1, #32 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: @@ -725,8 +701,7 @@ define i32 @ashr_demand_bottommask3(i64 %X) { ; CHECK-LABEL: ashr_demand_bottommask3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #3 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrl r0, r1, #3 ; CHECK-NEXT: bic r0, r0, #1 ; CHECK-NEXT: bx lr entry: @@ -748,8 +723,7 @@ define i32 @lsll_demand_bottommask3(i64 %X) { ; CHECK-LABEL: lsll_demand_bottommask3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #3 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #3 ; CHECK-NEXT: bic r0, r0, #1 ; CHECK-NEXT: bx lr entry: @@ -771,8 +745,7 @@ define i32 @ashr_demand_bottommaskm3(i64 %X) { ; CHECK-LABEL: ashr_demand_bottommaskm3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #2 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #3 ; CHECK-NEXT: bic r0, r0, #1 ; CHECK-NEXT: bx lr entry: @@ -794,8 +767,7 @@ define i32 @lsll_demand_bottommaskm3(i64 %X) { ; CHECK-LABEL: lsll_demand_bottommaskm3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #2 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsrl r0, r1, #3 ; CHECK-NEXT: bic r0, r0, #1 ; CHECK-NEXT: bx lr entry: @@ -818,8 +790,7 @@ define i32 @ashr_demand_bottommask32(i64 %X) { ; CHECK-LABEL: ashr_demand_bottommask32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #32 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrl r0, r1, #32 ; CHECK-NEXT: bic r0, r0, #1 ; CHECK-NEXT: bx lr entry: @@ -841,8 +812,7 @@ define i32 @lsll_demand_bottommask32(i64 %X) { ; CHECK-LABEL: lsll_demand_bottommask32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #32 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #32 ; CHECK-NEXT: bic r0, r0, #1 ; CHECK-NEXT: bx lr entry: @@ -864,8 +834,7 @@ define i32 @ashr_demand_bottommaskm32(i64 %X) { ; CHECK-LABEL: ashr_demand_bottommaskm32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #31 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #32 ; CHECK-NEXT: bic r0, r0, #1 ; CHECK-NEXT: bx lr entry: @@ -887,8 +856,7 @@ define i32 @lsll_demand_bottommaskm32(i64 %X) { ; CHECK-LABEL: lsll_demand_bottommaskm32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #31 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsrl r0, r1, #32 ; CHECK-NEXT: bic r0, r0, #1 ; CHECK-NEXT: bx lr entry: