Since ROLBRd needs an implicit R1 (on AVR) or an implicit R17 (on AVRTiny),
we split ROLBRd to ROLBRdR1 (on AVR) and ROLBRdR17 (on AVRTiny).
Details
Diff Detail
Event Timeline
llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | ||
---|---|---|
1486 | Hmm, this feels kinda excessive - wouldn't adding let Uses = [R1] to ROLBRd be sufficient? let Uses = [R1] in def ROLBRd : Pseudo<(outs GPR8 : $rd), (ins GPR8 : $src), "rolb\t$rd", [(set i8 : $rd, (AVRrol i8 : $src)), (implicit SREG)]>; |
llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | ||
---|---|---|
1486 | Because it maybe R17 on AVRTINY. |
llvm/test/CodeGen/AVR/rotate.ll | ||
---|---|---|
67 ↗ | (On Diff #528747) | We need not worry this, since I have made optimization for that, https://reviews.llvm.org/D152130 |
llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | ||
---|---|---|
1486 | ah, true, true |
llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | ||
---|---|---|
1486 | Maybe we could have two separate instructions, then? Something like: diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 726ed7303746..94cabb4e1387 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -1481,7 +1481,48 @@ bool AVRExpandPseudo::expand<AVR::POPWRd>(Block &MBB, BlockIt MBBI) { } template <> -bool AVRExpandPseudo::expand<AVR::ROLBRd>(Block &MBB, BlockIt MBBI) { +bool AVRExpandPseudo::expand<AVR::ROLBRdNT>(Block &MBB, BlockIt MBBI) { + // In AVR, the rotate instructions behave quite unintuitively. They rotate + // bits through the carry bit in SREG, effectively rotating over 9 bits, + // instead of 8. This is useful when we are dealing with numbers over + // multiple registers, but when we actually need to rotate stuff, we have + // to explicitly add the carry bit. + + const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>(); + + MachineInstr &MI = *MBBI; + unsigned OpShift, OpCarry; + Register DstReg = MI.getOperand(0).getReg(); + Register ZeroReg = STI.getZeroRegister(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + OpShift = AVR::ADDRdRr; + OpCarry = AVR::ADCRdRr; + + // add r16, r16 + // adc r16, r1 + + // Shift part + buildMI(MBB, MBBI, OpShift) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, RegState::Kill); + + // Add the carry bit + auto MIB = buildMI(MBB, MBBI, OpCarry) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(ZeroReg); + + MIB->getOperand(3).setIsDead(); // SREG is always dead + MIB->getOperand(4).setIsKill(); // SREG is always implicitly killed + + MI.eraseFromParent(); + return true; +} + +template <> +bool AVRExpandPseudo::expand<AVR::ROLBRdT>(Block &MBB, BlockIt MBBI) { // In AVR, the rotate instructions behave quite unintuitively. They rotate // bits through the carry bit in SREG, effectively rotating over 9 bits, // instead of 8. This is useful when we are dealing with numbers over @@ -2605,7 +2646,8 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) { EXPAND(AVR::OUTWARr); EXPAND(AVR::PUSHWRr); EXPAND(AVR::POPWRd); - EXPAND(AVR::ROLBRd); + EXPAND(AVR::ROLBRdNT); + EXPAND(AVR::ROLBRdT); EXPAND(AVR::RORBRd); EXPAND(AVR::LSLWRd); EXPAND(AVR::LSRWRd); diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index e44ef51ab3a8..d97b02681f2e 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -1758,6 +1758,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, MachineFunction *F = BB->getParent(); MachineRegisterInfo &RI = F->getRegInfo(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + const AVRSubtarget &STI = F->getSubtarget<AVRSubtarget>(); DebugLoc dl = MI.getDebugLoc(); switch (MI.getOpcode()) { @@ -1789,7 +1790,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, RC = &AVR::DREGSRegClass; break; case AVR::Rol8: - Opc = AVR::ROLBRd; + Opc = STI.hasTinyEncoding() ? AVR::ROLBRdT : AVR::ROLBRdNT; RC = &AVR::GPR8RegClass; break; case AVR::Rol16: diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index d0e75733114a..630369d2d25e 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -2029,7 +2029,8 @@ let Constraints = "$src = $rd", Defs = [SREG] in { def ASRWLoRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "asrwlo\t$rd", [(set i16:$rd, (AVRasrlo i16:$src)), (implicit SREG)]>; - def ROLBRd : Pseudo<(outs GPR8 + let Uses = [R1] in + def ROLBRdNT : Pseudo<(outs GPR8 : $rd), (ins GPR8 : $src), @@ -2037,7 +2038,20 @@ let Constraints = "$src = $rd", Defs = [SREG] in { [(set i8 : $rd, (AVRrol i8 : $src)), - (implicit SREG)]>; + (implicit SREG)]>, + Requires<[HasNonTinyEncoding]>; + + let Uses = [R17] in + def ROLBRdT : Pseudo<(outs GPR8 + : $rd), + (ins GPR8 + : $src), + "rolb\t$rd", + [(set i8 + : $rd, (AVRrol i8 + : $src)), + (implicit SREG)]>, + Requires<[HasTinyEncoding]>; def RORBRd : Pseudo<(outs GPR8 : $rd), Of course, this naive approach copy-pastes the code responsible for expanding the pseudo-instruction, but that could should be pretty easy to de-duplicate. |
This looks correct at first glance, but of course a bit inefficient.
I see two options to make this efficient:
- As proposed by @Patryk27, using two different instructions to differentiate between R1 and R17. This most likely works just fine.
- Custom lowering for ROLB that adds the zero register. There are many custom lowered instructions, this could be one of them.
I have a slight preference for option 1 (as it's perhaps a little bit more explicit), but either way would work.
llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | ||
---|---|---|
1481 | Nit: this is eor, not xor. | |
1486 | You can use this: buildMI(MBB, MBBI, AVR::EORRdRr, TmpReg) .addReg(TmpReg) .addReg(TmpReg); The third parameter to buildMI is the register to define. |
Nit: this is eor, not xor.