Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -350,6 +350,13 @@ SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool SimplifyDemandedBitsForTargetNode(SDValue Op, + const APInt &OriginalDemandedBits, + const APInt &OriginalDemandedElts, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth) const override; + bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override; /// allowsMisalignedMemoryAccesses - Returns true if the target allows Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14087,16 +14087,18 @@ SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); - // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from - // uses of the intrinsics. if (auto C = dyn_cast(N->getOperand(2))) { int ShiftAmt = C->getSExtValue(); + + // Shift by 0 is just the inputs if (ShiftAmt == 0) { SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL); DAG.ReplaceAllUsesWith(N, Merge.getNode()); return SDValue(); } + // Turn X << -C -> X >> C and viceversa. The negative shifts can come up + // from uses of the intrinsics. if (ShiftAmt >= -32 && ShiftAmt < 0) { unsigned NewOpcode = N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL; @@ -14105,6 +14107,70 @@ DAG.ReplaceAllUsesWith(N, NewShift.getNode()); return NewShift; } + + // If we do not use part of the result, turn the other half into a smaller + // shift or a mov, depending on the constant. + if (!N->hasAnyUseOfValue(0)) { + if (N->getOpcode() == ARMISD::LSRL || N->getOpcode() == ARMISD::ASRL) { + if (ShiftAmt >= 0 && ShiftAmt < 32) { + unsigned NewOpcode = + N->getOpcode() == ARMISD::ASRL ? ISD::SRA : ISD::SRL; + SDValue NewShift = DAG.getNode(NewOpcode, DL, MVT::i32, Op1, + DAG.getConstant(ShiftAmt, DL, MVT::i32)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewShift); + return SDValue(); + } else if (ShiftAmt > 32 && ShiftAmt < 64) { + if (N->getOpcode() == ARMISD::ASRL) { + SDValue NewShift = DAG.getNode(ISD::SRA, DL, MVT::i32, Op1, + DAG.getConstant(31, DL, MVT::i32)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewShift); + return SDValue(); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), + DAG.getConstant(0, DL, MVT::i32)); + return SDValue(); + } + } + } + else { + if (ShiftAmt == 32) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Op0); + return SDValue(); + } else if (ShiftAmt > 32 && ShiftAmt < 64) { + SDValue NewShift = DAG.getNode(ISD::SHL, DL, MVT::i32, Op0, + DAG.getConstant(ShiftAmt - 32, DL, MVT::i32)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewShift); + return SDValue(); + } + } + } + if (!N->hasAnyUseOfValue(1)) { + if (N->getOpcode() == ARMISD::LSLL) { + if (ShiftAmt >= 0 && ShiftAmt < 32) { + SDValue NewShift = DAG.getNode(ISD::SHL, DL, MVT::i32, Op0, + DAG.getConstant(ShiftAmt, DL, MVT::i32)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewShift); + return SDValue(); + } + else if (ShiftAmt >= 32 && ShiftAmt < 64) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), + DAG.getConstant(0, DL, MVT::i32)); + return SDValue(); + } + } else { + if (ShiftAmt == 32) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Op1); + return SDValue(); + } else if (ShiftAmt > 32 && ShiftAmt < 64) { + unsigned NewOpcode = + N->getOpcode() == ARMISD::ASRL ? ISD::SRA : ISD::SRL; + SDValue NewShift = DAG.getNode(NewOpcode, DL, MVT::i32, Op1, + DAG.getConstant(ShiftAmt - 32, DL, MVT::i32)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewShift); + return SDValue(); + } + } + } } return SDValue(); @@ -16167,6 +16233,35 @@ return false; } +bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode( + SDValue Op, const APInt &OriginalDemandedBits, + const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, + unsigned Depth) const { + unsigned Opc = Op.getOpcode(); + + switch (Opc) { + case ARMISD::ASRL: + case ARMISD::LSRL: { + // If this is result 0 and the other result is unused, see if the demand + // bits allow us to shrink this long shift into a standard small shift in + // the opposite direction. + if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) && + isa(Op->getOperand(2))) { + unsigned ShAmt = Op->getConstantOperandVal(2); + if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf( + APInt::getAllOnesValue(32) << (32 - ShAmt))) + return TLO.CombineTo( + Op, TLO.DAG.getNode( + ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1), + TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32))); + } + break; + } + } + + return TargetLowering::SimplifyDemandedBitsForTargetNode( + Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); +} //===----------------------------------------------------------------------===// // ARM Inline Assembly Support Index: llvm/test/CodeGen/Thumb2/fir.ll =================================================================== --- llvm/test/CodeGen/Thumb2/fir.ll +++ llvm/test/CodeGen/Thumb2/fir.ll @@ -3,27 +3,15 @@ ; RUN: llc --verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi -mattr=+dsp %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-NOMVE define void @test1(i32* %p0, i32 *%p1, i32 *%p2, i32 *%pDst) { -; CHECK-MVE-LABEL: test1: -; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: ldr r1, [r1] -; CHECK-MVE-NEXT: ldr r2, [r2] -; CHECK-MVE-NEXT: ldr r0, [r0] -; CHECK-MVE-NEXT: smull r2, r1, r2, r1 -; CHECK-MVE-NEXT: lsrl r2, r1, #31 -; CHECK-MVE-NEXT: bic r1, r2, #1 -; CHECK-MVE-NEXT: add r0, r1 -; CHECK-MVE-NEXT: str r0, [r3] -; CHECK-MVE-NEXT: bx lr -; -; CHECK-NOMVE-LABEL: test1: -; CHECK-NOMVE: @ %bb.0: @ %entry -; CHECK-NOMVE-NEXT: ldr r1, [r1] -; CHECK-NOMVE-NEXT: ldr r2, [r2] -; CHECK-NOMVE-NEXT: ldr r0, [r0] -; CHECK-NOMVE-NEXT: smmul r1, r2, r1 -; CHECK-NOMVE-NEXT: add.w r0, r0, r1, lsl #1 -; CHECK-NOMVE-NEXT: str r0, [r3] -; CHECK-NOMVE-NEXT: bx lr +; CHECK-LABEL: test1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: ldr r1, [r1] +; CHECK-NEXT: ldr r2, [r2] +; CHECK-NEXT: ldr r0, [r0] +; CHECK-NEXT: smmul r1, r2, r1 +; CHECK-NEXT: add.w r0, r0, r1, lsl #1 +; CHECK-NEXT: str r0, [r3] +; CHECK-NEXT: bx lr entry: %l3 = load i32, i32* %p0, align 4 %l4 = load i32, i32* %p1, align 4 Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/longshift-demand.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-intrinsics/longshift-demand.ll +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/longshift-demand.ll @@ -27,7 +27,7 @@ define i32 @lsll_demand_bottom3(i64 %X) { ; CHECK-LABEL: lsll_demand_bottom3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsll r0, r1, #3 +; CHECK-NEXT: lsls r0, r0, #3 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -47,7 +47,7 @@ define i32 @ashr_demand_bottomm3(i64 %X) { ; CHECK-LABEL: ashr_demand_bottomm3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsll r0, r1, #3 +; CHECK-NEXT: lsls r0, r0, #3 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -108,7 +108,7 @@ define i32 @lsll_demand_bottom31(i64 %X) { ; CHECK-LABEL: lsll_demand_bottom31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsll r0, r1, #31 +; CHECK-NEXT: lsls r0, r0, #31 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -128,7 +128,7 @@ define i32 @ashr_demand_bottomm31(i64 %X) { ; CHECK-LABEL: ashr_demand_bottomm31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsll r0, r1, #31 +; CHECK-NEXT: lsls r0, r0, #31 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -169,7 +169,7 @@ define i32 @ashr_demand_bottom32(i64 %X) { ; CHECK-LABEL: ashr_demand_bottom32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: asrl r0, r1, #32 +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -189,7 +189,7 @@ define i32 @lsll_demand_bottom32(i64 %X) { ; CHECK-LABEL: lsll_demand_bottom32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsll r0, r1, #32 +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -209,7 +209,7 @@ define i32 @ashr_demand_bottomm32(i64 %X) { ; CHECK-LABEL: ashr_demand_bottomm32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsll r0, r1, #32 +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -229,7 +229,7 @@ define i32 @lsll_demand_bottomm32(i64 %X) { ; CHECK-LABEL: lsll_demand_bottomm32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsrl r0, r1, #32 +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -250,8 +250,7 @@ define i32 @ashr_demand_bottom44(i64 %X) { ; CHECK-LABEL: ashr_demand_bottom44: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #44 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrs r0, r1, #12 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -271,8 +270,7 @@ define i32 @lsll_demand_bottom44(i64 %X) { ; CHECK-LABEL: lsll_demand_bottom44: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #44 -; CHECK-NEXT: lsll r0, r1, r2 +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -340,8 +338,7 @@ define i32 @ashr_demand_top3(i64 %X) { ; CHECK-LABEL: ashr_demand_top3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: asrl r0, r1, #3 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: asrs r0, r1, #3 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -406,8 +403,7 @@ define i32 @lsll_demand_topm3(i64 %X) { ; CHECK-LABEL: lsll_demand_topm3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsrl r0, r1, #3 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: lsrs r0, r1, #3 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -429,8 +425,7 @@ define i32 @ashr_demand_top31(i64 %X) { ; CHECK-LABEL: ashr_demand_top31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: asrl r0, r1, #31 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: asrs r0, r1, #31 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -495,8 +490,7 @@ define i32 @lsll_demand_topm31(i64 %X) { ; CHECK-LABEL: lsll_demand_topm31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsrl r0, r1, #31 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: lsrs r0, r1, #31 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -540,8 +534,6 @@ define i32 @lsll_demand_top32(i64 %X) { ; CHECK-LABEL: lsll_demand_top32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsll r0, r1, #32 -; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -562,8 +554,6 @@ define i32 @ashr_demand_topm32(i64 %X) { ; CHECK-LABEL: ashr_demand_topm32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsll r0, r1, #32 -; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -607,9 +597,7 @@ define i32 @ashr_demand_top44(i64 %X) { ; CHECK-LABEL: ashr_demand_top44: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #44 -; CHECK-NEXT: asrl r0, r1, r2 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: asrs r0, r1, #31 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -630,9 +618,7 @@ define i32 @lsll_demand_top44(i64 %X) { ; CHECK-LABEL: lsll_demand_top44: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #44 -; CHECK-NEXT: lsll r0, r1, r2 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: lsls r0, r0, #12 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -723,8 +709,7 @@ define i32 @lsll_demand_bottommask3(i64 %X) { ; CHECK-LABEL: lsll_demand_bottommask3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsll r0, r1, #3 -; CHECK-NEXT: bic r0, r0, #1 +; CHECK-NEXT: lsls r0, r0, #3 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -745,8 +730,7 @@ define i32 @ashr_demand_bottommaskm3(i64 %X) { ; CHECK-LABEL: ashr_demand_bottommaskm3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsll r0, r1, #3 -; CHECK-NEXT: bic r0, r0, #1 +; CHECK-NEXT: lsls r0, r0, #3 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -790,8 +774,7 @@ define i32 @ashr_demand_bottommask32(i64 %X) { ; CHECK-LABEL: ashr_demand_bottommask32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: asrl r0, r1, #32 -; CHECK-NEXT: bic r0, r0, #1 +; CHECK-NEXT: bic r0, r1, #1 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -812,8 +795,7 @@ define i32 @lsll_demand_bottommask32(i64 %X) { ; CHECK-LABEL: lsll_demand_bottommask32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsll r0, r1, #32 -; CHECK-NEXT: bic r0, r0, #1 +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -834,8 +816,7 @@ define i32 @ashr_demand_bottommaskm32(i64 %X) { ; CHECK-LABEL: ashr_demand_bottommaskm32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsll r0, r1, #32 -; CHECK-NEXT: bic r0, r0, #1 +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 @@ -856,8 +837,7 @@ define i32 @lsll_demand_bottommaskm32(i64 %X) { ; CHECK-LABEL: lsll_demand_bottommaskm32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsrl r0, r1, #32 -; CHECK-NEXT: bic r0, r0, #1 +; CHECK-NEXT: bic r0, r1, #1 ; CHECK-NEXT: bx lr entry: %0 = lshr i64 %X, 32 Index: llvm/test/CodeGen/Thumb2/shift_parts.ll =================================================================== --- llvm/test/CodeGen/Thumb2/shift_parts.ll +++ llvm/test/CodeGen/Thumb2/shift_parts.ll @@ -422,16 +422,10 @@ define i32 @ashr_demand_bottommask(i64 %x) { -; CHECK-MVE-LABEL: ashr_demand_bottommask: -; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: lsrl r0, r1, #31 -; CHECK-MVE-NEXT: bic r0, r0, #1 -; CHECK-MVE-NEXT: bx lr -; -; CHECK-NON-MVE-LABEL: ashr_demand_bottommask: -; CHECK-NON-MVE: @ %bb.0: @ %entry -; CHECK-NON-MVE-NEXT: lsls r0, r1, #1 -; CHECK-NON-MVE-NEXT: bx lr +; CHECK-LABEL: ashr_demand_bottommask: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsls r0, r1, #1 +; CHECK-NEXT: bx lr entry: %shr = ashr i64 %x, 31 %t = trunc i64 %shr to i32 @@ -440,16 +434,10 @@ } define i32 @lshr_demand_bottommask(i64 %x) { -; CHECK-MVE-LABEL: lshr_demand_bottommask: -; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: lsrl r0, r1, #31 -; CHECK-MVE-NEXT: bic r0, r0, #1 -; CHECK-MVE-NEXT: bx lr -; -; CHECK-NON-MVE-LABEL: lshr_demand_bottommask: -; CHECK-NON-MVE: @ %bb.0: @ %entry -; CHECK-NON-MVE-NEXT: lsls r0, r1, #1 -; CHECK-NON-MVE-NEXT: bx lr +; CHECK-LABEL: lshr_demand_bottommask: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsls r0, r1, #1 +; CHECK-NEXT: bx lr entry: %shr = lshr i64 %x, 31 %t = trunc i64 %shr to i32 @@ -470,17 +458,11 @@ } define i32 @ashr_demand_bottommask2(i64 %x) { -; CHECK-MVE-LABEL: ashr_demand_bottommask2: -; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: lsrl r0, r1, #31 -; CHECK-MVE-NEXT: bic r0, r0, #3 -; CHECK-MVE-NEXT: bx lr -; -; CHECK-NON-MVE-LABEL: ashr_demand_bottommask2: -; CHECK-NON-MVE: @ %bb.0: @ %entry -; CHECK-NON-MVE-NEXT: mvn r0, #2 -; CHECK-NON-MVE-NEXT: and.w r0, r0, r1, lsl #1 -; CHECK-NON-MVE-NEXT: bx lr +; CHECK-LABEL: ashr_demand_bottommask2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: mvn r0, #2 +; CHECK-NEXT: and.w r0, r0, r1, lsl #1 +; CHECK-NEXT: bx lr entry: %shr = ashr i64 %x, 31 %t = trunc i64 %shr to i32 @@ -489,17 +471,11 @@ } define i32 @lshr_demand_bottommask2(i64 %x) { -; CHECK-MVE-LABEL: lshr_demand_bottommask2: -; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: lsrl r0, r1, #31 -; CHECK-MVE-NEXT: bic r0, r0, #3 -; CHECK-MVE-NEXT: bx lr -; -; CHECK-NON-MVE-LABEL: lshr_demand_bottommask2: -; CHECK-NON-MVE: @ %bb.0: @ %entry -; CHECK-NON-MVE-NEXT: mvn r0, #2 -; CHECK-NON-MVE-NEXT: and.w r0, r0, r1, lsl #1 -; CHECK-NON-MVE-NEXT: bx lr +; CHECK-LABEL: lshr_demand_bottommask2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: mvn r0, #2 +; CHECK-NEXT: and.w r0, r0, r1, lsl #1 +; CHECK-NEXT: bx lr entry: %shr = lshr i64 %x, 31 %t = trunc i64 %shr to i32