Since ROLBRd needs an implicit R1 (on AVR) or an implicit R17 (on AVRTiny),
we split ROLBRd to ROLBRdR1 (on AVR) and ROLBRdR17 (on AVRTiny).
Details
Diff Detail
- Repository
- rG LLVM Github Monorepo
Event Timeline
llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | ||
---|---|---|
1505 | Hmm, this feels kinda excessive - wouldn't adding let Uses = [R1] to ROLBRd be sufficient? let Uses = [R1] in def ROLBRd : Pseudo<(outs GPR8 : $rd), (ins GPR8 : $src), "rolb\t$rd", [(set i8 : $rd, (AVRrol i8 : $src)), (implicit SREG)]>; |
llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | ||
---|---|---|
1505 | Because it maybe R17 on AVRTINY. |
llvm/test/CodeGen/AVR/rotate.ll | ||
---|---|---|
67 ↗ | (On Diff #528747) | We need not worry this, since I have made optimization for that, https://reviews.llvm.org/D152130 |
llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | ||
---|---|---|
1505 | ah, true, true |
llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | ||
---|---|---|
1505 | Maybe we could have two separate instructions, then? Something like: diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 726ed7303746..94cabb4e1387 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -1481,7 +1481,48 @@ bool AVRExpandPseudo::expand<AVR::POPWRd>(Block &MBB, BlockIt MBBI) { } template <> -bool AVRExpandPseudo::expand<AVR::ROLBRd>(Block &MBB, BlockIt MBBI) { +bool AVRExpandPseudo::expand<AVR::ROLBRdNT>(Block &MBB, BlockIt MBBI) { + // In AVR, the rotate instructions behave quite unintuitively. They rotate + // bits through the carry bit in SREG, effectively rotating over 9 bits, + // instead of 8. This is useful when we are dealing with numbers over + // multiple registers, but when we actually need to rotate stuff, we have + // to explicitly add the carry bit. + + const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>(); + + MachineInstr &MI = *MBBI; + unsigned OpShift, OpCarry; + Register DstReg = MI.getOperand(0).getReg(); + Register ZeroReg = STI.getZeroRegister(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + OpShift = AVR::ADDRdRr; + OpCarry = AVR::ADCRdRr; + + // add r16, r16 + // adc r16, r1 + + // Shift part + buildMI(MBB, MBBI, OpShift) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, RegState::Kill); + + // Add the carry bit + auto MIB = buildMI(MBB, MBBI, OpCarry) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(ZeroReg); + + MIB->getOperand(3).setIsDead(); // SREG is always dead + MIB->getOperand(4).setIsKill(); // SREG is always implicitly killed + + MI.eraseFromParent(); + return true; +} + +template <> +bool AVRExpandPseudo::expand<AVR::ROLBRdT>(Block &MBB, BlockIt MBBI) { // In AVR, the rotate instructions behave quite unintuitively. They rotate // bits through the carry bit in SREG, effectively rotating over 9 bits, // instead of 8. This is useful when we are dealing with numbers over @@ -2605,7 +2646,8 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) { EXPAND(AVR::OUTWARr); EXPAND(AVR::PUSHWRr); EXPAND(AVR::POPWRd); - EXPAND(AVR::ROLBRd); + EXPAND(AVR::ROLBRdNT); + EXPAND(AVR::ROLBRdT); EXPAND(AVR::RORBRd); EXPAND(AVR::LSLWRd); EXPAND(AVR::LSRWRd); diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index e44ef51ab3a8..d97b02681f2e 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -1758,6 +1758,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, MachineFunction *F = BB->getParent(); MachineRegisterInfo &RI = F->getRegInfo(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + const AVRSubtarget &STI = F->getSubtarget<AVRSubtarget>(); DebugLoc dl = MI.getDebugLoc(); switch (MI.getOpcode()) { @@ -1789,7 +1790,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, RC = &AVR::DREGSRegClass; break; case AVR::Rol8: - Opc = AVR::ROLBRd; + Opc = STI.hasTinyEncoding() ? AVR::ROLBRdT : AVR::ROLBRdNT; RC = &AVR::GPR8RegClass; break; case AVR::Rol16: diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index d0e75733114a..630369d2d25e 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -2029,7 +2029,8 @@ let Constraints = "$src = $rd", Defs = [SREG] in { def ASRWLoRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "asrwlo\t$rd", [(set i16:$rd, (AVRasrlo i16:$src)), (implicit SREG)]>; - def ROLBRd : Pseudo<(outs GPR8 + let Uses = [R1] in + def ROLBRdNT : Pseudo<(outs GPR8 : $rd), (ins GPR8 : $src), @@ -2037,7 +2038,20 @@ let Constraints = "$src = $rd", Defs = [SREG] in { [(set i8 : $rd, (AVRrol i8 : $src)), - (implicit SREG)]>; + (implicit SREG)]>, + Requires<[HasNonTinyEncoding]>; + + let Uses = [R17] in + def ROLBRdT : Pseudo<(outs GPR8 + : $rd), + (ins GPR8 + : $src), + "rolb\t$rd", + [(set i8 + : $rd, (AVRrol i8 + : $src)), + (implicit SREG)]>, + Requires<[HasTinyEncoding]>; def RORBRd : Pseudo<(outs GPR8 : $rd), Of course, this naive approach copy-pastes the code responsible for expanding the pseudo-instruction, but that could should be pretty easy to de-duplicate. |
This looks correct at first glance, but of course a bit inefficient.
I see two options to make this efficient:
- As proposed by @Patryk27, using two different instructions to differentiate between R1 and R17. This most likely works just fine.
- Custom lowering for ROLB that adds the zero register. There are many custom lowered instructions, this could be one of them.
I have a slight preference for option 1 (as it's perhaps a little bit more explicit), but either way would work.
llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | ||
---|---|---|
1500 | Nit: this is eor, not xor. | |
1505 | You can use this: buildMI(MBB, MBBI, AVR::EORRdRr, TmpReg) .addReg(TmpReg) .addReg(TmpReg); The third parameter to buildMI is the register to define. |
Nit: this is eor, not xor.