Index: llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -168,6 +168,7 @@ bool tryBitfieldExtractOpFromSExt(SDNode *N); bool tryBitfieldInsertOp(SDNode *N); bool tryBitfieldInsertInZeroOp(SDNode *N); + bool tryShiftAmountMod(SDNode *N); bool tryReadRegister(SDNode *N); bool tryWriteRegister(SDNode *N); @@ -2441,6 +2442,111 @@ return true; } +/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in +/// variable shift/rotate instructions. +bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { + EVT VT = N->getValueType(0); + + unsigned Opc; + switch (N->getOpcode()) { + case ISD::ROTR: + Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; + break; + case ISD::SHL: + Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; + break; + case ISD::SRL: + Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; + break; + case ISD::SRA: + Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; + break; + default: + return false; + } + + uint64_t Size; + uint64_t Bits; + if (VT == MVT::i32) { + Bits = 5; + Size = 32; + } else if (VT == MVT::i64) { + Bits = 6; + Size = 64; + } else + return false; + + SDValue ShiftAmt = N->getOperand(1); + SDLoc DL(N); + SDValue NewShiftAmt; + + // Skip over an extend of the shift amount. + if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || + ShiftAmt->getOpcode() == ISD::ANY_EXTEND) + ShiftAmt = ShiftAmt->getOperand(0); + + if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { + SDValue Add0 = ShiftAmt->getOperand(0); + SDValue Add1 = ShiftAmt->getOperand(1); + uint64_t Add0Imm; + uint64_t Add1Imm; + // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X + // to avoid the ADD/SUB. + if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) + NewShiftAmt = Add0; + // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to + // generate a NEG instead of a SUB of a constant. + else if (ShiftAmt->getOpcode() == ISD::SUB && + isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && + (Add0Imm % Size == 0)) { + unsigned NegOpc; + unsigned ZeroReg; + EVT SubVT = ShiftAmt->getValueType(0); + if (SubVT == MVT::i32) { + NegOpc = AArch64::SUBWrr; + ZeroReg = AArch64::WZR; + } else { + assert(SubVT == MVT::i64); + NegOpc = AArch64::SUBXrr; + ZeroReg = AArch64::XZR; + } + SDValue Zero = + CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); + MachineSDNode *Neg = + CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); + NewShiftAmt = SDValue(Neg, 0); + } else + return false; + } else { + // If the shift amount is masked with an AND, check that the mask covers the + // bits that are implicitly ANDed off by the above opcodes and if so, skip + // the AND. + uint64_t MaskImm; + if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm)) + return false; + + if (countTrailingOnes(MaskImm) < Bits) + return false; + + NewShiftAmt = ShiftAmt->getOperand(0); + } + + // Narrow/widen the shift amount to match the size of the shift operation. + if (VT == MVT::i32) + NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); + else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); + MachineSDNode *Ext = CurDAG->getMachineNode( + AArch64::SUBREG_TO_REG, DL, VT, + CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); + NewShiftAmt = SDValue(Ext, 0); + } + + SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; + CurDAG->SelectNodeTo(N, Opc, VT, Ops); + return true; +} + bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth) { @@ -2707,6 +2813,11 @@ return; if (tryBitfieldInsertInZeroOp(Node)) return; + LLVM_FALLTHROUGH; + case ISD::ROTR: + case ISD::SHL: + if (tryShiftAmountMod(Node)) + return; break; case ISD::SIGN_EXTEND: Index: llvm/trunk/test/CodeGen/AArch64/arm64-long-shift.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-long-shift.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-long-shift.ll @@ -2,19 +2,17 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: shl: -; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40 -; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2 +; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2 ; CHECK: lsr [[LO_FOR_HI_NORMAL:x[0-9]+]], x0, [[REV_SHIFT]] ; CHECK: cmp x2, #0 ; CHECK: csel [[LO_FOR_HI:x[0-9]+]], xzr, [[LO_FOR_HI_NORMAL]], eq ; CHECK: lsl [[HI_FOR_HI:x[0-9]+]], x1, x2 ; CHECK: orr [[HI_NORMAL:x[0-9]+]], [[LO_FOR_HI]], [[HI_FOR_HI]] +; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, x2 ; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64 -; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, [[EXTRA_SHIFT]] ; CHECK: cmp [[EXTRA_SHIFT]], #0 ; CHECK: csel x1, [[HI_BIG_SHIFT]], [[HI_NORMAL]], ge -; CHECK: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2 -; CHECK: csel x0, xzr, [[SMALLSHIFT_LO]], ge +; CHECK: csel x0, xzr, [[HI_BIG_SHIFT]], ge ; CHECK: ret %shl = shl i128 %r, %s @@ -23,20 +21,18 @@ define i128 @ashr(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: ashr: -; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40 -; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2 +; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2 ; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]] ; CHECK: cmp x2, #0 ; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq ; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2 ; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]] +; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, x2 ; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64 -; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]] ; CHECK: cmp [[EXTRA_SHIFT]], #0 ; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge -; CHECK: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2 ; CHECK: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63 -; CHECK: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge +; CHECK: csel x1, [[BIGSHIFT_HI]], [[LO_BIG_SHIFT]], ge ; CHECK: ret %shr = ashr i128 %r, %s @@ -45,19 +41,16 @@ define i128 @lshr(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: lshr: -; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40 -; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2 +; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2 ; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]] ; CHECK: cmp x2, #0 ; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq ; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2 ; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]] -; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64 -; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]] +; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, x2 ; CHECK: cmp [[EXTRA_SHIFT]], #0 ; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge -; CHECK: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2 -; CHECK: csel x1, xzr, [[SMALLSHIFT_HI]], ge +; CHECK: csel x1, xzr, [[LO_BIG_SHIFT]], ge ; CHECK: ret %shr = lshr i128 %r, %s Index: llvm/trunk/test/CodeGen/AArch64/shift-mod.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/shift-mod.ll +++ llvm/trunk/test/CodeGen/AArch64/shift-mod.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +; Check that we optimize out AND instructions and ADD/SUB instructions +; modulo the shift size to take advantage of the implicit mod done on +; the shift amount value by the variable shift/rotate instructions. + +define i32 @test1(i32 %x, i64 %y) { +; CHECK-LABEL: test1: +; CHECK-NOT: and +; CHECK: lsr + %sh_prom = trunc i64 %y to i32 + %shr = lshr i32 %x, %sh_prom + ret i32 %shr +} + +define i64 @test2(i32 %x, i64 %y) { +; CHECK-LABEL: test2: +; CHECK-NOT: orr +; CHECK-NOT: sub +; CHECK: neg +; CHECK: asr + %sub9 = sub nsw i32 64, %x + %sh_prom12.i = zext i32 %sub9 to i64 + %shr.i = ashr i64 %y, %sh_prom12.i + ret i64 %shr.i +} + +define i64 @test3(i64 %x, i64 %y) { +; CHECK-LABEL: test3: +; CHECK-NOT: add +; CHECK: lsl + %add = add nsw i64 64, %x + %shl = shl i64 %y, %add + ret i64 %shl +} \ No newline at end of file