Index: lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- lib/Target/ARM/ARMISelDAGToDAG.cpp +++ lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2767,6 +2767,52 @@ } } + auto CreateThumb1ShlSrl = [&](SDValue X, unsigned LslAmt, unsigned LsrAmt) { + SDValue ShlOps[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, + CurDAG->getTargetConstant(LslAmt, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; + auto Shl = CurDAG->getMachineNode(ARM::tLSLri, dl, MVT::i32, ShlOps); + SDValue SrlOps[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), + SDValue(Shl, 0), + CurDAG->getTargetConstant(LsrAmt, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; + return CurDAG->getMachineNode(ARM::tLSRri, dl, MVT::i32, SrlOps); + }; + + if (N1C && Subtarget->isThumb1Only()) { + uint32_t C1 = (uint32_t)N1C->getZExtValue(); + SDValue N0 = N->getOperand(0); + uint32_t C2 = 0; + + // Turn (and (srl x, c2) c1) -> (srl (shl c3-c2) c3) if c1 is a mask + // with c3 leading zeros, and this isn't a uxtb or uxth. + // + // Thumb1-only for now: Thumb2 has ubfx, and it isn't obviously + // profitable in ARM mode. + if (N0.hasOneUse() && isOpcWithIntImmediate(N0.getNode(), ISD::SRL, C2) && + C2 < 32 && isMask_32(C1) && C1 != 255 && C1 != 65535) { + uint32_t C3 = countLeadingZeros(C1); + // C2 < C3 would imply the AND is a no-op, but guard against it anyway. + if (C2 < C3) { + ReplaceNode(N, CreateThumb1ShlSrl(N0.getOperand(0), C3 - C2, C3)); + return; + } + } + + // Turn (and (shl x, c2) c1) -> (srl (shl c2+c3) c3) if c1 is a mask + // shifted by c2 bits with c3 leading zeros. + // + // Thumb1-only for now; other targets probably prefer and+lsl. + if (N0.hasOneUse() && isOpcWithIntImmediate(N0.getNode(), ISD::SHL, C2) && + C2 < 32 && isShiftedMask_32(C1)) { + uint32_t C3 = countLeadingZeros(C1); + if (C2 + C3 < 32 && C1 == ((-1U << (C2 + C3)) >> C3)) { + ReplaceNode(N, CreateThumb1ShlSrl(N0.getOperand(0), C2 + C3, C3)); + return; + } + } + } + // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits // are entirely contributed by c2 and lower 16-bits are entirely contributed Index: test/CodeGen/Thumb/shift-and.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb/shift-and.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s + +define i32 @test1(i32 %x) { +; CHECK-LABEL: test1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsls r0, r0, #20 +; CHECK-NEXT: lsrs r0, r0, #22 +; CHECK-NEXT: bx lr +entry: + %0 = lshr i32 %x, 2 + %shr = and i32 %0, 1023 + ret i32 %shr +} + +define i32 @test2(i32 %x) { +; CHECK-LABEL: test2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsrs r1, r0, #2 +; CHECK-NEXT: ldr r0, .LCPI1_0 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .long 1022 @ 0x3fe +entry: + %0 = lshr i32 %x, 2 + %shr = and i32 %0, 1022 + ret i32 %shr +} + +define i32 @test3(i32 %x) { +; CHECK-LABEL: test3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsls r0, r0, #31 +; CHECK-NEXT: lsrs r0, r0, #2 +; CHECK-NEXT: bx lr +entry: + %0 = shl i32 %x, 29 + %shr = and i32 %0, 536870912 + ret i32 %shr +} + +define i32 @test4(i32 %x) { +; CHECK-LABEL: test4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movs r1, #5 +; CHECK-NEXT: lsls r1, r1, #29 +; CHECK-NEXT: lsls r0, r0, #29 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: bx lr +entry: + %0 = shl i32 %x, 29 + %shr = and i32 %0, 2684354560 + ret i32 %shr +}