Index: llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -4576,12 +4576,6 @@ case Intrinsic::arm_mve_sqrshrl: SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); return; - case Intrinsic::arm_mve_lsll: - SelectMVE_LongShift(N, ARM::MVE_LSLLr, false, false); - return; - case Intrinsic::arm_mve_asrl: - SelectMVE_LongShift(N, ARM::MVE_ASRLr, false, false); - return; case Intrinsic::arm_mve_vadc: case Intrinsic::arm_mve_vadc_predicated: Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3799,6 +3799,12 @@ case Intrinsic::arm_mve_vreinterpretq: return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(), Op.getOperand(1)); + case Intrinsic::arm_mve_lsll: + return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::arm_mve_asrl: + return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } } @@ -14076,6 +14082,34 @@ return SDValue(); } +static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from + // uses of the intrinsics. + if (auto C = dyn_cast(N->getOperand(2))) { + int ShiftAmt = C->getSExtValue(); + if (ShiftAmt == 0) { + SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL); + DAG.ReplaceAllUsesWith(N, Merge.getNode()); + return SDValue(); + } + + if (ShiftAmt > -32 && ShiftAmt < 0) { + unsigned NewOpcode = + N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL; + SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1, + DAG.getConstant(-ShiftAmt, DL, MVT::i32)); + DAG.ReplaceAllUsesWith(N, NewShift.getNode()); + return NewShift; + } + } + + return SDValue(); +} + /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); @@ -14971,6 +15005,10 @@ return PerformVCMPCombine(N, DCI, Subtarget); case ISD::VECREDUCE_ADD: return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget); + case ARMISD::ASRL: + case ARMISD::LSRL: + case ARMISD::LSLL: + return PerformLongShiftCombine(N, DCI.DAG); case ARMISD::SMULWB: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); Index: llvm/test/CodeGen/Thumb2/longshift.ll =================================================================== --- llvm/test/CodeGen/Thumb2/longshift.ll +++ llvm/test/CodeGen/Thumb2/longshift.ll @@ -7,8 +7,6 @@ define i64 @asrl_0(i64 %X) { ; CHECK-LABEL: asrl_0: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: asrl r0, r1, r2 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -27,8 +25,7 @@ define i64 @asrl_23(i64 %X) { ; CHECK-LABEL: asrl_23: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #23 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrl r0, r1, #23 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -87,8 +84,7 @@ define i64 @asrl_m2(i64 %X) { ; CHECK-LABEL: asrl_m2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #1 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #2 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -130,8 +126,6 @@ define i64 @lsll_0(i64 %X) { ; CHECK-LABEL: lsll_0: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: lsll r0, r1, r2 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -150,8 +144,7 @@ define i64 @lsll_23(i64 %X) { ; CHECK-LABEL: lsll_23: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #23 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsll r0, r1, #23 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -210,8 +203,7 @@ define i64 @lsll_m2(i64 %X) { ; CHECK-LABEL: lsll_m2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #1 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: lsrl r0, r1, #2 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32