Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -369,6 +369,7 @@ private: bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, SDValue &Shift); + bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift); bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); @@ -606,6 +607,81 @@ return false; } +/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2 +/// to select more shifted register +bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, + SDValue &Shift) { + EVT VT = N.getValueType(); + if (VT != MVT::i32 && VT != MVT::i64) + return false; + + if (N->getOpcode() != ISD::AND || !N->hasOneUse()) + return false; + SDValue LHS = N.getOperand(0); + if (!LHS->hasOneUse()) + return false; + + unsigned LHSOpcode = LHS->getOpcode(); + if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA) + return false; + + ConstantSDNode *ShiftAmtNode = dyn_cast(LHS.getOperand(1)); + if (!ShiftAmtNode) + return false; + + uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue(); + SDValue RHS = N.getOperand(1); + ConstantSDNode *RHSC = dyn_cast(RHS); + if (!RHSC) + return false; + + APInt AndMask = RHSC->getAPIntValue(); + unsigned LowZBits, MaskLen; + if (!AndMask.isShiftedMask(LowZBits, MaskLen)) + return false; + + unsigned BitWidth = N.getValueSizeInBits(); + SDLoc DL(LHS); + uint64_t NewShiftC; + unsigned NewShiftOp; + if (LHSOpcode == ISD::SHL) { + if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen)) + return false; + + NewShiftC = LowZBits - ShiftAmtC; + NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; + } else { + if (LowZBits == 0) + return false; + + NewShiftC = LowZBits + ShiftAmtC; + if (NewShiftC >= BitWidth) + return false; + + if (LHSOpcode == ISD::SRA && + ((BitWidth != (LowZBits + MaskLen)) || (BitWidth <= NewShiftC))) + return false; + + if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen))) + return false; + + if (LHSOpcode == ISD::SRL) + NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; + else + NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri; + } + + assert(NewShiftC < BitWidth && "Invalid shift amount"); + SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT); + SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT); + Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0), + NewShiftAmt, BitWidthMinus1), + 0); + unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits); + Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32); + return true; +} + /// SelectShiftedRegister - Select a "shifted register" operand. If the value /// is not shifted, set the Shift operand to default of "LSL 0". The logical /// instructions allow the shifted register to be rotated, but the arithmetic @@ -613,6 +689,9 @@ /// supported. bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, SDValue &Shift) { + if (SelectShiftedRegisterFromAnd(N, Reg, Shift)) + return true; + AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); if (ShType == AArch64_AM::InvalidShiftExtend) return false; Index: llvm/test/CodeGen/AArch64/shiftregister-from-and.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/shiftregister-from-and.ll @@ -0,0 +1,291 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +; logic shift reg pattern: and +; already optimized by another pattern + +define i64 @and_shiftedreg_from_and(i64 %a, i64 %b) { +; CHECK-LABEL: and_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x1, x0, asr #23 +; CHECK-NEXT: and x0, x8, #0xffffffffff000000 +; CHECK-NEXT: ret + %ashr = ashr i64 %a, 23 + %and = and i64 %ashr, -16777216 + %r = and i64 %b, %and + ret i64 %r +} + +; TODO: logic shift reg pattern: bic + +define i64 @bic_shiftedreg_from_and(i64 %a, i64 %b) { +; CHECK-LABEL: bic_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #16777215 +; CHECK-NEXT: orn x8, x8, x0, asr #23 +; CHECK-NEXT: and x0, x1, x8 +; CHECK-NEXT: ret + %ashr = ashr i64 %a, 23 + %and = and i64 %ashr, -16777216 + %not = xor i64 %and, -1 + %r = and i64 %b, %not + ret i64 %r +} + +; logic shift reg pattern: eon + +define i64 @eon_shiftedreg_from_and(i64 %a, i64 %b) { +; CHECK-LABEL: eon_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #17 +; CHECK-NEXT: eon x0, x1, x8, lsl #53 +; CHECK-NEXT: ret + %shl = shl i64 %a, 36 + %and = and i64 %shl, -9007199254740992 + %xor = xor i64 %and, -1 + %r = xor i64 %b, %xor + ret i64 %r +} + +; logic shift reg pattern: eor + +define i64 @eor_shiftedreg_from_and(i64 %a, i64 %b) { +; CHECK-LABEL: eor_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #47 +; CHECK-NEXT: eor x0, x1, x8, lsl #24 +; CHECK-NEXT: ret + %lshr = lshr i64 %a, 23 + %and = and i64 %lshr, 2199006478336 + %or = xor i64 %and, %b + ret i64 %or +} + +; logic shift reg pattern: mvn +; already optimized by another pattern + +define i64 @mvn_shiftedreg_from_and(i64 %a) { +; CHECK-LABEL: mvn_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #9007199254740991 +; CHECK-NEXT: orn x0, x8, x0, lsl #36 +; CHECK-NEXT: ret + %shl = shl i64 %a, 36 + %and = and i64 %shl, -9007199254740992 + %xor = xor i64 %and, -1 + ret i64 %xor +} + +; logic shift reg pattern: orn +; already optimized by another pattern + +define i64 @orn_shiftedreg_from_and(i64 %a, i64 %b) { +; CHECK-LABEL: orn_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: orn x8, x1, x0, lsr #23 +; CHECK-NEXT: orr x0, x8, #0xfffffe0000ffffff +; CHECK-NEXT: ret + %lshr = lshr i64 %a, 23 + %and = and i64 %lshr, 2199006478336 + %not = xor i64 %and, -1 + %or = or i64 %not, %b + ret i64 %or +} + +; logic shift reg pattern: orr +; srl constant bitwidth == (lowbits + masklen + shiftamt) + +define i64 @orr_shiftedreg_from_and(i64 %a, i64 %b) { +; CHECK-LABEL: orr_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #47 +; CHECK-NEXT: orr x0, x1, x8, lsl #24 +; CHECK-NEXT: ret + %lshr = lshr i64 %a, 23 + %and = and i64 %lshr, 2199006478336 ; 0x1ffff000000 + %or = or i64 %and, %b + ret i64 %or +} + +; logic shift reg pattern: orr +; srl constant bitwidth < (lowbits + masklen + shiftamt) + +define i64 @orr_shiftedreg_from_and_mask2(i64 %a, i64 %b) { +; CHECK-LABEL: orr_shiftedreg_from_and_mask2: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #47 +; CHECK-NEXT: orr x0, x1, x8, lsl #24 +; CHECK-NEXT: ret + %lshr = lshr i64 %a, 23 + %and = and i64 %lshr, 4398029733888 ; 0x3ffff000000 + %or = or i64 %and, %b + ret i64 %or +} + + +; arithmetic shift reg pattern: add + +define i32 @add_shiftedreg_from_and(i32 %a, i32 %b) { +; CHECK-LABEL: add_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w8, w0, #27 +; CHECK-NEXT: add w0, w1, w8, lsl #24 +; CHECK-NEXT: ret + %ashr = ashr i32 %a, 3 + %and = and i32 %ashr, -16777216 + %add = add i32 %and, %b + ret i32 %add +} + +; arithmetic shift reg pattern: sub + +define i64 @sub_shiftedreg_from_and_shl(i64 %a, i64 %b) { +; CHECK-LABEL: sub_shiftedreg_from_and_shl: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #17 +; CHECK-NEXT: sub x0, x1, x8, lsl #53 +; CHECK-NEXT: ret + %shl = shl i64 %a, 36 + %and = and i64 %shl, -9007199254740992 + %sub = sub i64 %b, %and + ret i64 %sub +} + +; negative test: type is not i32 or i64 + +define <2 x i32> @shiftedreg_from_and_negative_type(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_type: +; CHECK: // %bb.0: +; CHECK-NEXT: shl v0.2s, v0.2s, #2 +; CHECK-NEXT: bic v0.2s, #31 +; CHECK-NEXT: sub v0.2s, v1.2s, v0.2s +; CHECK-NEXT: ret + %shl = shl <2 x i32> %a, + %and = and <2 x i32> %shl, + %sub = sub <2 x i32> %b, %and + ret <2 x i32> %sub +} + +; negative test: shift one-use + +define i32 @shiftedreg_from_and_negative_oneuse1(i32 %a, i32 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_oneuse1: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w8, w0, #23 +; CHECK-NEXT: and w9, w8, #0xff000000 +; CHECK-NEXT: add w9, w9, w1 +; CHECK-NEXT: mul w0, w8, w9 +; CHECK-NEXT: ret + %ashr = ashr i32 %a, 23 + %and = and i32 %ashr, -16777216 + %add = add i32 %and, %b + %r = mul i32 %ashr, %add + ret i32 %r +} + +; negative test: and one-use + +define i32 @shiftedreg_from_and_negative_oneuse2(i32 %a, i32 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_oneuse2: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w8, w0, #23 +; CHECK-NEXT: and w8, w8, #0xff000000 +; CHECK-NEXT: add w9, w8, w1 +; CHECK-NEXT: mul w0, w8, w9 +; CHECK-NEXT: ret + %ashr = ashr i32 %a, 23 + %and = and i32 %ashr, -16777216 + %add = add i32 %and, %b + %r = mul i32 %and, %add + ret i32 %r +} + +; negative test: and c is not mask + +define i32 @shiftedreg_from_and_negative_andc1(i32 %a, i32 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_andc1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #26215 +; CHECK-NEXT: movk w8, #65510, lsl #16 +; CHECK-NEXT: and w8, w8, w0, asr #23 +; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: ret + %ashr = ashr i32 %a, 23 + %and = and i32 %ashr, -1677721 + %add = add i32 %and, %b + ret i32 %add +} + +; negative test: sra with and c is not legal mask + +define i32 @shiftedreg_from_and_negative_andc2(i32 %a, i32 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_andc2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-285212672 +; CHECK-NEXT: and w8, w8, w0, asr #23 +; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: ret + %ashr = ashr i32 %a, 23 + %and = and i32 %ashr, 4009754624 ; 0xef000000 + %add = add i32 %and, %b + ret i32 %add +} + +; negative test: shl with and c is not legal mask + +define i64 @shiftedreg_from_and_negative_andc3(i64 %a, i64 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_andc3: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x0, x1, x0, lsl #36 +; CHECK-NEXT: ret + %shl = shl i64 %a, 36 + %and = and i64 %shl, -4294967296 + %xor = xor i64 %and, %b + ret i64 %xor +} + +; negative test: shl with and c is not legal mask + +define i64 @shiftedreg_from_and_negative_andc4(i64 %a, i64 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_andc4: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl x8, x0, #36 +; CHECK-NEXT: and x8, x8, #0x7fe0000000000000 +; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: ret + %shl = shl i64 %a, 36 + %and = and i64 %shl, 9214364837600034816 + %xor = xor i64 %and, %b + ret i64 %xor +} + +; negative test: sra with and c is not legal mask + +define i32 @shiftedreg_from_and_negative_andc5(i32 %a, i32 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_andc5: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w8, w0, #23 +; CHECK-NEXT: and w8, w8, #0xff000000 +; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: ret + %ashr = ashr i32 %a, 23 + %and = and i32 %ashr, -16777216 + %add = add i32 %and, %b + ret i32 %add +} + +; negative test: srl with and c is not legal mask +; srl constant bitwidth > (lowbits + masklen + shiftamt) + +define i64 @shiftedreg_from_and_negative_andc6(i64 %a, i64 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_andc6: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #2 +; CHECK-NEXT: and x8, x8, #0x6 +; CHECK-NEXT: add x0, x8, x1 +; CHECK-NEXT: ret + %lshr = lshr i64 %a, 2 + %and = and i64 %lshr, 6 + %add = add i64 %and, %b + ret i64 %add +}