Index: llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -1364,6 +1364,34 @@ return true; } +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstLoReg, DstHiReg; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(2).isDead(); + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // Move lower byte to upper byte. + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg); + + // Clear lower byte. + auto MIBLO = buildMI(MBB, MBBI, AVR::EORRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg, getKillRegState(DstIsKill)) + .addReg(DstLoReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MIBLO->getOperand(3).setIsDead(); + + MI.eraseFromParent(); + return true; +} + template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; @@ -1395,6 +1423,34 @@ return true; } +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstLoReg, DstHiReg; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(2).isDead(); + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // Move upper byte to lower byte. + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg); + + // Clear upper byte. + auto MIBHI = buildMI(MBB, MBBI, AVR::EORRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, getKillRegState(DstIsKill)) + .addReg(DstHiReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MIBHI->getOperand(3).setIsDead(); + + MI.eraseFromParent(); + return true; +} + template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { llvm_unreachable("RORW unimplemented"); @@ -1438,6 +1494,39 @@ return true; } +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstLoReg, DstHiReg; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(2).isDead(); + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // Move upper byte to lower byte. + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, getKillRegState(DstIsKill)); + + // Move the sign bit to the C flag. + buildMI(MBB, MBBI, AVR::ADDRdRr).addReg(DstHiReg) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, getKillRegState(DstIsKill)); + + // Set upper byte to 0 or -1. + auto MIBHI = buildMI(MBB, MBBI, AVR::SBCRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, getKillRegState(DstIsKill)) + .addReg(DstHiReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MIBHI->getOperand(3).setIsDead(); + + MI.eraseFromParent(); + return true; +} + template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; @@ -1654,10 +1743,13 @@ EXPAND(AVR::ROLBRd); EXPAND(AVR::RORBRd); EXPAND(AVR::LSLWRd); + EXPAND(AVR::LSLW8Rd); EXPAND(AVR::LSRWRd); + EXPAND(AVR::LSRW8Rd); EXPAND(AVR::RORWRd); EXPAND(AVR::ROLWRd); EXPAND(AVR::ASRWRd); + EXPAND(AVR::ASRW8Rd); EXPAND(AVR::SEXT); EXPAND(AVR::ZEXT); EXPAND(AVR::SPREAD); Index: llvm/lib/Target/AVR/AVRISelLowering.h =================================================================== --- llvm/lib/Target/AVR/AVRISelLowering.h +++ llvm/lib/Target/AVR/AVRISelLowering.h @@ -36,8 +36,11 @@ /// TargetExternalSymbol, and TargetGlobalAddress. WRAPPER, LSL, ///< Logical shift left. + LSL8, ///< Logical shift left 8 bits. LSR, ///< Logical shift right. + LSR8, ///< Logical shift right 8 bits. ASR, ///< Arithmetic shift right. + ASR8, ///< Arithmetic shift right 8 bits. ROR, ///< Bit rotate right. ROL, ///< Bit rotate left. LSLLOOP, ///< A loop of single logical shift left instructions. Index: llvm/lib/Target/AVR/AVRISelLowering.cpp =================================================================== --- llvm/lib/Target/AVR/AVRISelLowering.cpp +++ llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -334,6 +334,25 @@ llvm_unreachable("Invalid shift opcode"); } + // Optimize 16-bit int shift when ShiftAmount >= 8. + if (VT.getSizeInBits() == 16 && ShiftAmount >= 8) + switch (Op.getOpcode()) { + case ISD::SHL: + Victim = DAG.getNode(AVRISD::LSL8, dl, VT, Victim); + ShiftAmount -= 8; + break; + case ISD::SRL: + Victim = DAG.getNode(AVRISD::LSR8, dl, VT, Victim); + ShiftAmount -= 8; + break; + case ISD::SRA: + Victim = DAG.getNode(AVRISD::ASR8, dl, VT, Victim); + ShiftAmount -= 8; + break; + default: + break; + } + while (ShiftAmount--) { Victim = DAG.getNode(Opc8, dl, VT, Victim); } Index: llvm/lib/Target/AVR/AVRInstrInfo.td =================================================================== --- llvm/lib/Target/AVR/AVRInstrInfo.td +++ llvm/lib/Target/AVR/AVRInstrInfo.td @@ -55,10 +55,13 @@ // Shift nodes. def AVRlsl : SDNode<"AVRISD::LSL", SDTIntUnaryOp>; +def AVRlsl8 : SDNode<"AVRISD::LSL8", SDTIntUnaryOp>; def AVRlsr : SDNode<"AVRISD::LSR", SDTIntUnaryOp>; +def AVRlsr8 : SDNode<"AVRISD::LSR8", SDTIntUnaryOp>; def AVRrol : SDNode<"AVRISD::ROL", SDTIntUnaryOp>; def AVRror : SDNode<"AVRISD::ROR", SDTIntUnaryOp>; def AVRasr : SDNode<"AVRISD::ASR", SDTIntUnaryOp>; +def AVRasr8 : SDNode<"AVRISD::ASR8", SDTIntUnaryOp>; // Pseudo shift nodes for non-constant shift amounts. def AVRlslLoop : SDNode<"AVRISD::LSLLOOP", SDTIntShiftOp>; @@ -1653,6 +1656,11 @@ "lslw\t$rd", [(set i16:$rd, (AVRlsl i16:$src)), (implicit SREG)]>; + def LSLW8Rd : Pseudo<(outs DREGS:$rd), + (ins DREGS:$src), + "lslw8\t$rd", + [(set i16:$rd, (AVRlsl8 i16:$src)), (implicit SREG)]>; + def LSRRd : FRd<0b1001, 0b0100110, (outs GPR8:$rd), @@ -1665,6 +1673,11 @@ "lsrw\t$rd", [(set i16:$rd, (AVRlsr i16:$src)), (implicit SREG)]>; + def LSRW8Rd : Pseudo<(outs DREGS:$rd), + (ins DREGS:$src), + "lsrw8\t$rd", + [(set i16:$rd, (AVRlsr8 i16:$src)), (implicit SREG)]>; + def ASRRd : FRd<0b1001, 0b0100101, (outs GPR8:$rd), @@ -1677,6 +1690,11 @@ "asrw\t$rd", [(set i16:$rd, (AVRasr i16:$src)), (implicit SREG)]>; + def ASRW8Rd : Pseudo<(outs DREGS:$rd), + (ins DREGS:$src), + "asrw8\t$rd", + [(set i16:$rd, (AVRasr8 i16:$src)), (implicit SREG)]>; + // Bit rotate operations. let Uses = [SREG] in { @@ -2092,12 +2110,7 @@ def : Pat<(i16 (AVRWrapper tblockaddress:$dst)), (LDIWRdK tblockaddress:$dst)>; -// hi-reg truncation : trunc(int16 >> 8) -//:FIXME: i think it's better to emit an extract subreg node in the DAG than -// all this mess once we get optimal shift code -// lol... I think so, too. [@agnat] -def : Pat<(i8 (trunc (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr - (AVRlsr DREGS:$src)))))))))), +def : Pat<(i8 (trunc (AVRlsr8 DREGS:$src))), (EXTRACT_SUBREG DREGS:$src, sub_hi)>; // :FIXME: DAGCombiner produces an shl node after legalization from these seq: Index: llvm/test/CodeGen/AVR/shift.ll =================================================================== --- llvm/test/CodeGen/AVR/shift.ll +++ llvm/test/CodeGen/AVR/shift.ll @@ -1,8 +1,42 @@ ; RUN: llc < %s -march=avr | FileCheck %s -; CHECK-LABEL: shift_i64_i64 define i64 @shift_i64_i64(i64 %a, i64 %b) { - ; CHECK: call __ashldi3 +; CHECK-LABEL: shift_i64_i64 +; CHECK: call __ashldi3 %result = shl i64 %a, %b ret i64 %result } + +define i16 @lsl_i16(i16 %a) { +; CHECK-LABEL: lsl_i16 +; CHECK: mov r25, r24 +; CHECK-NEXT: clr r24 +; CHECK-NEXT: lsl r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: ret + %result = shl i16 %a, 9 + ret i16 %result +} + +define i16 @lsr_i16(i16 %a) { +; CHECK-LABEL: lsr_i16 +; CHECK: mov r24, r25 +; CHECK-NEXT: clr r25 +; CHECK-NEXT: lsr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ret + %result = lshr i16 %a, 9 + ret i16 %result +} + +define i16 @asr_i16(i16 %a) { +; CHECK-LABEL: asr_i16 +; CHECK: mov r24, r25 +; CHECK-NEXT: lsl r25 +; CHECK-NEXT: sbc r25, r25 +; CHECK-NEXT: asr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ret + %result = ashr i16 %a, 9 + ret i16 %result +}