Index: lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- lib/Target/ARM/ARMISelDAGToDAG.cpp +++ lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2309,6 +2309,11 @@ Srl_imm)) { assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); + // Mask off the unnecessary bits of the AND immediate; normally + // DAGCombine will do this, but that might not happen if + // targetShrinkDemandedConstant chooses a different immediate. + And_imm &= -1U >> Srl_imm; + // Note: The width operand is encoded as width-1. unsigned Width = countTrailingOnes(And_imm) - 1; unsigned LSB = Srl_imm; Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -389,6 +389,9 @@ const SelectionDAG &DAG, unsigned Depth) const override; + bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const override; + bool ExpandInlineAsm(CallInst *CI) const override; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -13481,6 +13481,83 @@ } } +bool +ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op, + const APInt &DemandedAPInt, + TargetLoweringOpt &TLO) const { + // Delay optimization, so we don't have to deal with illegal types, or block + // optimizations. + if (!TLO.LegalOps) + return false; + + // Only optimize AND for now. + if (Op.getOpcode() != ISD::AND) + return false; + + EVT VT = Op.getValueType(); + + // Ignore vectors. + if (VT.isVector()) + return false; + + assert(VT == MVT::i32 && "Unexpected integer type"); + + // Make sure the RHS really is a constant. + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + if (!C) + return false; + + unsigned Mask = C->getZExtValue(); + + // If mask is zero, nothing to do. + if (!Mask) + return false; + + unsigned Demanded = DemandedAPInt.getZExtValue(); + unsigned ShrunkMask = Mask & Demanded; + unsigned ExpandedMask = Mask | ~Demanded; + + auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool { + return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0; + }; + auto UseMask = [this, Mask, Op, VT, &TLO](unsigned NewMask) -> bool { + if (NewMask == Mask) + return true; + SDLoc DL(Op); + SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); + SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); + return TLO.CombineTo(Op, NewOp); + }; + + // Prefer uxtb mask. + if (IsLegalMask(0xFF)) + return UseMask(0xFF); + + // Prefer uxth mask. + if (IsLegalMask(0xFFFF)) + return UseMask(0xFFFF); + + // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2. + // FIXME: Prefer a contiguous sequence of bits for other optimizations. + if (ShrunkMask < 256) + return UseMask(ShrunkMask); + + // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2. + // FIXME: Prefer a contiguous sequence of bits for other optimizations. + if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256) + return UseMask(ExpandedMask); + + // Potential improvements: + // + // We could try to recognize lsls+lsrs or lsrs+lsls pairs here. + // We could try to prefer Thumb1 immediates which can be lowered to a + // two-instruction sequence. + // We could try to recognize more legal ARM/Thumb2 immediates here. + + return false; +} + + //===----------------------------------------------------------------------===// // ARM Inline Assembly Support //===----------------------------------------------------------------------===// Index: test/CodeGen/ARM/Windows/alloca.ll =================================================================== --- test/CodeGen/ARM/Windows/alloca.ll +++ test/CodeGen/ARM/Windows/alloca.ll @@ -19,7 +19,7 @@ ; because we do not have the kill flag on R0. ; CHECK: mov.w [[R1:lr]], #7 ; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2 -; CHECK: bic [[R0]], [[R0]], #7 +; CHECK: bic [[R0]], [[R0]], #4 ; CHECK: lsrs r4, [[R0]], #2 ; CHECK: bl __chkstk ; CHECK: sub.w sp, sp, r4 Index: test/CodeGen/ARM/Windows/vla.ll =================================================================== --- test/CodeGen/ARM/Windows/vla.ll +++ test/CodeGen/ARM/Windows/vla.ll @@ -14,13 +14,13 @@ } ; CHECK-SMALL-CODE: adds [[R4:r[0-9]+]], #7 -; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #7 +; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #4 ; CHECK-SMALL-CODE: lsrs r4, [[R4]], #2 ; CHECK-SMALL-CODE: bl __chkstk ; CHECK-SMALL-CODE: sub.w sp, sp, r4 ; CHECK-LARGE-CODE: adds [[R4:r[0-9]+]], #7 -; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #7 +; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #4 ; CHECK-LARGE-CODE: lsrs r4, [[R4]], #2 ; CHECK-LARGE-CODE: movw [[IP:r[0-9]+]], :lower16:__chkstk ; CHECK-LARGE-CODE: movt [[IP]], :upper16:__chkstk Index: test/CodeGen/ARM/and-cmpz.ll =================================================================== --- test/CodeGen/ARM/and-cmpz.ll +++ test/CodeGen/ARM/and-cmpz.ll @@ -89,11 +89,10 @@ } ; CHECK-LABEL: i16_cmpz: -; T1: movs r2, #127 -; T1-NEXT: lsls r2, r2, #9 -; T1-NEXT: ands r2, r0 -; T1-NEXT: lsrs r0, r2, #9 -; T2: and r0, r0, #65024 +; T1: uxth r0, r0 +; T1-NEXT: lsrs r0, r0, #9 +; T1-NEXT: bne +; T2: uxth r0, r0 ; T2-NEXT: movs r2, #0 ; T2-NEXT: cmp.w r2, r0, lsr #9 define void @i16_cmpz(i16 %x, void (i32)* %foo) { Index: test/CodeGen/ARM/illegal-bitfield-loadstore.ll =================================================================== --- test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -30,10 +30,8 @@ ; LE-LABEL: i24_and_or: ; LE: @ %bb.0: ; LE-NEXT: ldrh r1, [r0] -; LE-NEXT: mov r2, #16256 -; LE-NEXT: orr r2, r2, #49152 ; LE-NEXT: orr r1, r1, #384 -; LE-NEXT: and r1, r1, r2 +; LE-NEXT: bic r1, r1, #127 ; LE-NEXT: strh r1, [r0] ; LE-NEXT: mov pc, lr ; Index: test/CodeGen/ARM/select_const.ll =================================================================== --- test/CodeGen/ARM/select_const.ll +++ test/CodeGen/ARM/select_const.ll @@ -314,9 +314,8 @@ ; CHECK-NEXT: mov r1, #1 ; CHECK-NEXT: tst r0, #1 ; CHECK-NEXT: orr r1, r1, #65536 -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: moveq r0, #23 -; CHECK-NEXT: and r0, r0, r1 +; CHECK-NEXT: moveq r1, #23 +; CHECK-NEXT: bic r0, r1, #22 ; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i64 65537, i64 23