Index: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2309,6 +2309,11 @@ Srl_imm)) { assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); + // Mask off the unnecessary bits of the AND immediate; normally + // DAGCombine will do this, but that might not happen if + // targetShrinkDemandedConstant chooses a different immediate. + And_imm &= -1U >> Srl_imm; + // Note: The width operand is encoded as width-1. unsigned Width = countTrailingOnes(And_imm) - 1; unsigned LSB = Srl_imm; Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h @@ -389,6 +389,9 @@ const SelectionDAG &DAG, unsigned Depth) const override; + bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const override; + bool ExpandInlineAsm(CallInst *CI) const override; Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -13602,6 +13602,83 @@ } } +bool +ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op, + const APInt &DemandedAPInt, + TargetLoweringOpt &TLO) const { + // Delay optimization, so we don't have to deal with illegal types, or block + // optimizations. + if (!TLO.LegalOps) + return false; + + // Only optimize AND for now. + if (Op.getOpcode() != ISD::AND) + return false; + + EVT VT = Op.getValueType(); + + // Ignore vectors. + if (VT.isVector()) + return false; + + assert(VT == MVT::i32 && "Unexpected integer type"); + + // Make sure the RHS really is a constant. + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + if (!C) + return false; + + unsigned Mask = C->getZExtValue(); + + // If mask is zero, nothing to do. + if (!Mask) + return false; + + unsigned Demanded = DemandedAPInt.getZExtValue(); + unsigned ShrunkMask = Mask & Demanded; + unsigned ExpandedMask = Mask | ~Demanded; + + auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool { + return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0; + }; + auto UseMask = [this, Mask, Op, VT, &TLO](unsigned NewMask) -> bool { + if (NewMask == Mask) + return true; + SDLoc DL(Op); + SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); + SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); + return TLO.CombineTo(Op, NewOp); + }; + + // Prefer uxtb mask. + if (IsLegalMask(0xFF)) + return UseMask(0xFF); + + // Prefer uxth mask. + if (IsLegalMask(0xFFFF)) + return UseMask(0xFFFF); + + // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2. + // FIXME: Prefer a contiguous sequence of bits for other optimizations. + if (ShrunkMask < 256) + return UseMask(ShrunkMask); + + // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2. + // FIXME: Prefer a contiguous sequence of bits for other optimizations. + if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256) + return UseMask(ExpandedMask); + + // Potential improvements: + // + // We could try to recognize lsls+lsrs or lsrs+lsls pairs here. + // We could try to prefer Thumb1 immediates which can be lowered to a + // two-instruction sequence. + // We could try to recognize more legal ARM/Thumb2 immediates here. + + return false; +} + + //===----------------------------------------------------------------------===// // ARM Inline Assembly Support //===----------------------------------------------------------------------===// Index: llvm/trunk/test/CodeGen/ARM/Windows/alloca.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/Windows/alloca.ll +++ llvm/trunk/test/CodeGen/ARM/Windows/alloca.ll @@ -19,7 +19,7 @@ ; because we do not have the kill flag on R0. ; CHECK: mov.w [[R1:lr]], #7 ; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2 -; CHECK: bic [[R0]], [[R0]], #7 +; CHECK: bic [[R0]], [[R0]], #4 ; CHECK: lsrs r4, [[R0]], #2 ; CHECK: bl __chkstk ; CHECK: sub.w sp, sp, r4 Index: llvm/trunk/test/CodeGen/ARM/Windows/vla.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/Windows/vla.ll +++ llvm/trunk/test/CodeGen/ARM/Windows/vla.ll @@ -14,13 +14,13 @@ } ; CHECK-SMALL-CODE: adds [[R4:r[0-9]+]], #7 -; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #7 +; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #4 ; CHECK-SMALL-CODE: lsrs r4, [[R4]], #2 ; CHECK-SMALL-CODE: bl __chkstk ; CHECK-SMALL-CODE: sub.w sp, sp, r4 ; CHECK-LARGE-CODE: adds [[R4:r[0-9]+]], #7 -; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #7 +; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #4 ; CHECK-LARGE-CODE: lsrs r4, [[R4]], #2 ; CHECK-LARGE-CODE: movw [[IP:r[0-9]+]], :lower16:__chkstk ; CHECK-LARGE-CODE: movt [[IP]], :upper16:__chkstk Index: llvm/trunk/test/CodeGen/ARM/and-cmpz.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/and-cmpz.ll +++ llvm/trunk/test/CodeGen/ARM/and-cmpz.ll @@ -89,11 +89,10 @@ } ; CHECK-LABEL: i16_cmpz: -; T1: movs r2, #127 -; T1-NEXT: lsls r2, r2, #9 -; T1-NEXT: ands r2, r0 -; T1-NEXT: lsrs r0, r2, #9 -; T2: and r0, r0, #65024 +; T1: uxth r0, r0 +; T1-NEXT: lsrs r0, r0, #9 +; T1-NEXT: bne +; T2: uxth r0, r0 ; T2-NEXT: movs r2, #0 ; T2-NEXT: cmp.w r2, r0, lsr #9 define void @i16_cmpz(i16 %x, void (i32)* %foo) { Index: llvm/trunk/test/CodeGen/ARM/illegal-bitfield-loadstore.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ llvm/trunk/test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -30,10 +30,8 @@ ; LE-LABEL: i24_and_or: ; LE: @ %bb.0: ; LE-NEXT: ldrh r1, [r0] -; LE-NEXT: mov r2, #16256 -; LE-NEXT: orr r2, r2, #49152 ; LE-NEXT: orr r1, r1, #384 -; LE-NEXT: and r1, r1, r2 +; LE-NEXT: bic r1, r1, #127 ; LE-NEXT: strh r1, [r0] ; LE-NEXT: mov pc, lr ; Index: llvm/trunk/test/CodeGen/ARM/select_const.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/select_const.ll +++ llvm/trunk/test/CodeGen/ARM/select_const.ll @@ -314,9 +314,8 @@ ; CHECK-NEXT: mov r1, #1 ; CHECK-NEXT: tst r0, #1 ; CHECK-NEXT: orr r1, r1, #65536 -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: moveq r0, #23 -; CHECK-NEXT: and r0, r0, r1 +; CHECK-NEXT: moveq r1, #23 +; CHECK-NEXT: bic r0, r1, #22 ; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i64 65537, i64 23 Index: llvm/trunk/test/CodeGen/Thumb/bic_imm.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb/bic_imm.ll +++ llvm/trunk/test/CodeGen/Thumb/bic_imm.ll @@ -39,22 +39,77 @@ define void @truncated(i16 %a, i16* %p) { ; CHECK-T1-LABEL: truncated: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: ldr r2, .LCPI2_0 +; CHECK-T1-NEXT: movs r2, #128 +; CHECK-T1-NEXT: bics r0, r2 +; CHECK-T1-NEXT: strh r0, [r1] +; CHECK-T1-NEXT: bx lr +; +; CHECK-T2-LABEL: truncated: +; CHECK-T2: @ %bb.0: +; CHECK-T2-NEXT: bic r0, r0, #128 +; CHECK-T2-NEXT: strh r0, [r1] +; CHECK-T2-NEXT: bx lr + %and = and i16 %a, -129 + store i16 %and, i16* %p + ret void +} + +define void @truncated_neg2(i16 %a, i16* %p) { +; CHECK-T1-LABEL: truncated_neg2: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: movs r2, #1 +; CHECK-T1-NEXT: bics r0, r2 +; CHECK-T1-NEXT: strh r0, [r1] +; CHECK-T1-NEXT: bx lr +; +; CHECK-T2-LABEL: truncated_neg2: +; CHECK-T2: @ %bb.0: +; CHECK-T2-NEXT: bic r0, r0, #1 +; CHECK-T2-NEXT: strh r0, [r1] +; CHECK-T2-NEXT: bx lr + %and = and i16 %a, -2 + store i16 %and, i16* %p + ret void +} + +define void @truncated_neg256(i16 %a, i16* %p) { +; CHECK-T1-LABEL: truncated_neg256: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: movs r2, #255 +; CHECK-T1-NEXT: bics r0, r2 +; CHECK-T1-NEXT: strh r0, [r1] +; CHECK-T1-NEXT: bx lr +; +; CHECK-T2-LABEL: truncated_neg256: +; CHECK-T2: @ %bb.0: +; CHECK-T2-NEXT: bic r0, r0, #255 +; CHECK-T2-NEXT: strh r0, [r1] +; CHECK-T2-NEXT: bx lr + %and = and i16 %a, -256 + store i16 %and, i16* %p + ret void +} + +; FIXME: Thumb2 supports "bic r0, r0, #510" +define void @truncated_neg511(i16 %a, i16* %p) { +; CHECK-T1-LABEL: truncated_neg511: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: ldr r2, .LCPI5_0 ; CHECK-T1-NEXT: ands r2, r0 ; CHECK-T1-NEXT: strh r2, [r1] ; CHECK-T1-NEXT: bx lr ; CHECK-T1-NEXT: .p2align 2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: .LCPI2_0: -; CHECK-T1-NEXT: .long 65407 @ 0xff7f +; CHECK-T1-NEXT: .LCPI5_0: +; CHECK-T1-NEXT: .long 65025 @ 0xfe01 ; -; CHECK-T2-LABEL: truncated: +; CHECK-T2-LABEL: truncated_neg511: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: movw r2, #65407 +; CHECK-T2-NEXT: movw r2, #65025 ; CHECK-T2-NEXT: ands r0, r2 ; CHECK-T2-NEXT: strh r0, [r1] ; CHECK-T2-NEXT: bx lr - %and = and i16 %a, -129 + %and = and i16 %a, -511 store i16 %and, i16* %p ret void } Index: llvm/trunk/test/CodeGen/Thumb/shift-and.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb/shift-and.ll +++ llvm/trunk/test/CodeGen/Thumb/shift-and.ll @@ -46,7 +46,7 @@ ; CHECK-LABEL: test4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: lsls r0, r0, #4 -; CHECK-NEXT: movs r1, #127 +; CHECK-NEXT: movs r1, #112 ; CHECK-NEXT: bics r0, r1 ; CHECK-NEXT: bx lr entry: @@ -110,16 +110,24 @@ define i32 @test9(i32 %x) { ; CHECK-LABEL: test9: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsrs r1, r0, #2 -; CHECK-NEXT: ldr r0, .LCPI8_0 -; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: lsrs r0, r0, #2 +; CHECK-NEXT: movs r1, #1 +; CHECK-NEXT: bics r0, r1 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI8_0: -; CHECK-NEXT: .long 1073741822 @ 0x3ffffffe entry: %and = lshr i32 %x, 2 %shr = and i32 %and, 1073741822 ret i32 %shr } + +define i32 @test10(i32 %x) { +; CHECK-LABEL: test10: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsls r0, r0, #2 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: bx lr +entry: + %0 = shl i32 %x, 2 + %shr = and i32 %0, 255 + ret i32 %shr +}