Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -350,6 +350,13 @@ SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool SimplifyDemandedBitsForTargetNode(SDValue Op, + const APInt &OriginalDemandedBits, + const APInt &OriginalDemandedElts, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth) const override; + bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override; /// allowsMisalignedMemoryAccesses - Returns true if the target allows Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -16167,6 +16167,35 @@ return false; } +bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode( + SDValue Op, const APInt &OriginalDemandedBits, + const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, + unsigned Depth) const { + unsigned Opc = Op.getOpcode(); + + switch (Opc) { + case ARMISD::ASRL: + case ARMISD::LSRL: { + // If this is result 0 and the other result is unused, see if the demand + // bits allow us to shrink this long shift into a standard small shift in + // the opposite direction. + if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) && + isa(Op->getOperand(2))) { + unsigned ShAmt = Op->getConstantOperandVal(2); + if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf( + APInt::getAllOnesValue(32) << (32 - ShAmt))) + return TLO.CombineTo( + Op, TLO.DAG.getNode( + ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1), + TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32))); + } + break; + } + } + + return TargetLowering::SimplifyDemandedBitsForTargetNode( + Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); +} //===----------------------------------------------------------------------===// // ARM Inline Assembly Support Index: llvm/test/CodeGen/Thumb2/fir.ll =================================================================== --- llvm/test/CodeGen/Thumb2/fir.ll +++ llvm/test/CodeGen/Thumb2/fir.ll @@ -3,27 +3,15 @@ ; RUN: llc --verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi -mattr=+dsp %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-NOMVE define void @test1(i32* %p0, i32 *%p1, i32 *%p2, i32 *%pDst) { -; CHECK-MVE-LABEL: test1: -; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: ldr r1, [r1] -; CHECK-MVE-NEXT: ldr r2, [r2] -; CHECK-MVE-NEXT: ldr r0, [r0] -; CHECK-MVE-NEXT: smull r2, r1, r2, r1 -; CHECK-MVE-NEXT: lsrl r2, r1, #31 -; CHECK-MVE-NEXT: bic r1, r2, #1 -; CHECK-MVE-NEXT: add r0, r1 -; CHECK-MVE-NEXT: str r0, [r3] -; CHECK-MVE-NEXT: bx lr -; -; CHECK-NOMVE-LABEL: test1: -; CHECK-NOMVE: @ %bb.0: @ %entry -; CHECK-NOMVE-NEXT: ldr r1, [r1] -; CHECK-NOMVE-NEXT: ldr r2, [r2] -; CHECK-NOMVE-NEXT: ldr r0, [r0] -; CHECK-NOMVE-NEXT: smmul r1, r2, r1 -; CHECK-NOMVE-NEXT: add.w r0, r0, r1, lsl #1 -; CHECK-NOMVE-NEXT: str r0, [r3] -; CHECK-NOMVE-NEXT: bx lr +; CHECK-LABEL: test1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: ldr r1, [r1] +; CHECK-NEXT: ldr r2, [r2] +; CHECK-NEXT: ldr r0, [r0] +; CHECK-NEXT: smmul r1, r2, r1 +; CHECK-NEXT: add.w r0, r0, r1, lsl #1 +; CHECK-NEXT: str r0, [r3] +; CHECK-NEXT: bx lr entry: %l3 = load i32, i32* %p0, align 4 %l4 = load i32, i32* %p1, align 4 Index: llvm/test/CodeGen/Thumb2/shift_parts.ll =================================================================== --- llvm/test/CodeGen/Thumb2/shift_parts.ll +++ llvm/test/CodeGen/Thumb2/shift_parts.ll @@ -422,16 +422,10 @@ define i32 @ashr_demand_bottommask(i64 %x) { -; CHECK-MVE-LABEL: ashr_demand_bottommask: -; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: lsrl r0, r1, #31 -; CHECK-MVE-NEXT: bic r0, r0, #1 -; CHECK-MVE-NEXT: bx lr -; -; CHECK-NON-MVE-LABEL: ashr_demand_bottommask: -; CHECK-NON-MVE: @ %bb.0: @ %entry -; CHECK-NON-MVE-NEXT: lsls r0, r1, #1 -; CHECK-NON-MVE-NEXT: bx lr +; CHECK-LABEL: ashr_demand_bottommask: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsls r0, r1, #1 +; CHECK-NEXT: bx lr entry: %shr = ashr i64 %x, 31 %t = trunc i64 %shr to i32 @@ -440,16 +434,10 @@ } define i32 @lshr_demand_bottommask(i64 %x) { -; CHECK-MVE-LABEL: lshr_demand_bottommask: -; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: lsrl r0, r1, #31 -; CHECK-MVE-NEXT: bic r0, r0, #1 -; CHECK-MVE-NEXT: bx lr -; -; CHECK-NON-MVE-LABEL: lshr_demand_bottommask: -; CHECK-NON-MVE: @ %bb.0: @ %entry -; CHECK-NON-MVE-NEXT: lsls r0, r1, #1 -; CHECK-NON-MVE-NEXT: bx lr +; CHECK-LABEL: lshr_demand_bottommask: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsls r0, r1, #1 +; CHECK-NEXT: bx lr entry: %shr = lshr i64 %x, 31 %t = trunc i64 %shr to i32 @@ -470,17 +458,11 @@ } define i32 @ashr_demand_bottommask2(i64 %x) { -; CHECK-MVE-LABEL: ashr_demand_bottommask2: -; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: lsrl r0, r1, #31 -; CHECK-MVE-NEXT: bic r0, r0, #3 -; CHECK-MVE-NEXT: bx lr -; -; CHECK-NON-MVE-LABEL: ashr_demand_bottommask2: -; CHECK-NON-MVE: @ %bb.0: @ %entry -; CHECK-NON-MVE-NEXT: mvn r0, #2 -; CHECK-NON-MVE-NEXT: and.w r0, r0, r1, lsl #1 -; CHECK-NON-MVE-NEXT: bx lr +; CHECK-LABEL: ashr_demand_bottommask2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: mvn r0, #2 +; CHECK-NEXT: and.w r0, r0, r1, lsl #1 +; CHECK-NEXT: bx lr entry: %shr = ashr i64 %x, 31 %t = trunc i64 %shr to i32 @@ -489,17 +471,11 @@ } define i32 @lshr_demand_bottommask2(i64 %x) { -; CHECK-MVE-LABEL: lshr_demand_bottommask2: -; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: lsrl r0, r1, #31 -; CHECK-MVE-NEXT: bic r0, r0, #3 -; CHECK-MVE-NEXT: bx lr -; -; CHECK-NON-MVE-LABEL: lshr_demand_bottommask2: -; CHECK-NON-MVE: @ %bb.0: @ %entry -; CHECK-NON-MVE-NEXT: mvn r0, #2 -; CHECK-NON-MVE-NEXT: and.w r0, r0, r1, lsl #1 -; CHECK-NON-MVE-NEXT: bx lr +; CHECK-LABEL: lshr_demand_bottommask2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: mvn r0, #2 +; CHECK-NEXT: and.w r0, r0, r1, lsl #1 +; CHECK-NEXT: bx lr entry: %shr = lshr i64 %x, 31 %t = trunc i64 %shr to i32