Index: llvm/lib/Target/AVR/AVRInstrInfo.h =================================================================== --- llvm/lib/Target/AVR/AVRInstrInfo.h +++ llvm/lib/Target/AVR/AVRInstrInfo.h @@ -90,6 +90,14 @@ unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; + bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, + Register &SrcReg2, int64_t &CmpMask, + int64_t &CmpValue) const override; + + bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, + Register SrcReg2, int64_t CmpMask, int64_t CmpValue, + const MachineRegisterInfo *MRI) const override; + // Branch analysis. bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, Index: llvm/lib/Target/AVR/AVRInstrInfo.cpp =================================================================== --- llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -187,6 +187,147 @@ .addMemOperand(MMO); } +// Analyze a compare instruction. This is mainly used in the peephole optimize +// before calling optimizeCompareInstr. +bool AVRInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, + Register &SrcReg2, int64_t &CmpMask, + int64_t &CmpValue) const { + switch (MI.getOpcode()) { + default: + break; + case AVR::CPIRdK: + SrcReg = MI.getOperand(0).getReg(); + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = MI.getOperand(1).getImm(); + return true; + } + + // There are more compare instructions, but they're not implemented yet. + return false; +} + +// Returns true if this instruction has a single 8-bit output register and it +// sets the zero flag in SREG according to this output. +// (There are other instructions that affect the zero flag, but not in this +// specific way). +static bool setsZeroFlag(MachineInstr *Instr) { + switch (Instr->getOpcode()) { + default: + return false; + case AVR::ADCRdRr: + case AVR::ADDRdRr: + case AVR::ANDRdRr: + case AVR::ANDIRdK: + case AVR::ASRRd: + case AVR::COMRd: + case AVR::DECRd: + case AVR::EORRdRr: + case AVR::INCRd: + case AVR::LSRRd: + case AVR::NEGRd: + case AVR::ORRdRr: + case AVR::ORIRdK: + case AVR::RORRd: + case AVR::SUBRdRr: + case AVR::SUBIRdK: + return true; + } +} + +// Optimize compare instructions. At the moment, it can only remove a comparison +// against zero (cpi rN, 0x00) where only the Z flag of SREG is used and a +// previous instruction (like andi) already sets this flag. +bool AVRInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, + Register SrcReg2, int64_t CmpMask, + int64_t CmpValue, + const MachineRegisterInfo *MRI) const { + // This function is pretty large. The first part checks all the preconditions + // for removing the compare instruction. Only at the very end (when all checks + // pass) is the compare instruction removed. + + // Check whether this is an instruction of the form 'cpi rN, 0x00'. + if (CmpInstr.getOpcode() != AVR::CPIRdK) + return false; // only optimize CPI instructions + if (CmpMask != ~0 || CmpValue != 0) + return false; // currently we only support optimizing comparisons against 0 + + // Find the instruction that defines the register input for cpi. + MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg); + + if (!setsZeroFlag(SrcRegDef)) + // This is some other instruction, like ld. + return false; + + // Iterate over the instructions before the compare instruction and check that + // none of them modify SREG. + // TODO: we could allow instructions that leave the Z flag unchanged, like + // sei. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineBasicBlock &CmpMBB = *CmpInstr.getParent(); + bool FoundDef = false; + MachineBasicBlock::reverse_iterator From = + std::next(MachineBasicBlock::reverse_iterator(CmpInstr)); + for (MachineInstr &Inst : make_range(From, CmpMBB.rend())) { + if (&Inst == SrcRegDef) { + FoundDef = true; + break; + } + if (Inst.modifiesRegister(AVR::SREG, TRI)) + return false; + } + if (!FoundDef) + // We arrived at the start of the basic block without a modification of + // SREG. This is likely uncommon so not worth traversing further. + return false; + + // Check the instructions that follow the compare instruction. If they read + // the SREG register, they may only use the zero flag. + bool FlagsMayLiveOut = true; + MachineBasicBlock::iterator AfterCmpInstr = + std::next(MachineBasicBlock::iterator(CmpInstr)); + for (MachineInstr &Instr : make_range(AfterCmpInstr, CmpMBB.end())) { + switch (Instr.getOpcode()) { + case AVR::BRNEk: + case AVR::BREQk: + // These instructions only use the zero flag. + if (Instr.getOperand(0).getMBB()->isLiveIn(AVR::SREG)) + // Unlikely, but to be sure: check that the block to branch to doesn't + // use the current SREG value. + return false; + continue; + } + if (Instr.readsRegister(AVR::SREG, TRI)) + // This instruction might read the Z flag. + return false; + if (Instr.definesRegister(AVR::SREG, TRI)) { + // The SREG register is updated, in theory also including the Z flag. + // Many instructions don't fully modify SREG without declaring they also + // read it, so this isn't strictly speaking safe, but I don't think the + // compiler makes use of this fact anywhere. + FlagsMayLiveOut = false; + break; + } + } + + // One of the successor blocks uses the SREG register. + // This is unlikely, but make sure to correctly handle this case anyway. + if (FlagsMayLiveOut) { + for (MachineBasicBlock *Successor : CmpMBB.successors()) + if (Successor->isLiveIn(AVR::SREG)) + return false; + } + + // We can safely remove the comparison instruction! + CmpInstr.eraseFromParent(); + + // The implicit SREG output operand may have been set as dead. After this + // transformation, it is not dead anymore. + SrcRegDef->findRegisterDefOperand(AVR::SREG)->setIsDead(false); + + return true; +} + const MCInstrDesc &AVRInstrInfo::getBrCond(AVRCC::CondCodes CC) const { switch (CC) { default: Index: llvm/lib/Target/AVR/AVRInstrInfo.td =================================================================== --- llvm/lib/Target/AVR/AVRInstrInfo.td +++ llvm/lib/Target/AVR/AVRInstrInfo.td @@ -1015,7 +1015,7 @@ //===----------------------------------------------------------------------===// // Compare operations. //===----------------------------------------------------------------------===// -let Defs = [SREG] in { +let Defs = [SREG], isCompare = 1 in { // CPSE Rd, Rr // Compare Rd and Rr, skipping the next instruction if they are equal. let isBarrier = 1, isBranch = 1, Index: llvm/test/CodeGen/AVR/fold-cmp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AVR/fold-cmp.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=attiny85 < %s -mtriple=avr | FileCheck %s + +; This file checks whether cpi is optimized away. The full test is in +; fold-cmp.mir. This just checks that it also works for the whole pipeline. + +; Check whether the SREG flags of the 'and' instruction are used directly in the +; following conditional branch. +define i8 @bitsAreZero(i8 %val) { +; CHECK-LABEL: bitsAreZero: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: andi r24, 6 +; CHECK-NEXT: breq .LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %else +; CHECK-NEXT: ldi r24, 2 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: ; %then +; CHECK-NEXT: ldi r24, 1 +; CHECK-NEXT: ret +entry: + %bits = and i8 %val, 6 + %cmp = icmp eq i8 %bits, 0 + br i1 %cmp, label %then, label %else +then: + ret i8 1 +else: + ret i8 2 +} Index: llvm/test/CodeGen/AVR/fold-cmp.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AVR/fold-cmp.mir @@ -0,0 +1,84 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -run-pass=peephole-opt -mtriple=avr %s -o - | FileCheck %s + +--- +name: common_case +body: | + bb.0.entry: + ; CHECK-LABEL: name: common_case + ; CHECK: successors: %bb.0(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ld8 = COPY $r0 + ; CHECK-NEXT: [[ANDIRdK:%[0-9]+]]:ld8 = ANDIRdK [[COPY]], 3, implicit-def $sreg + ; CHECK-NEXT: BRNEk %bb.0, implicit $sreg + ; CHECK-NEXT: RET + %0:ld8 = COPY $r0 + %1:ld8 = ANDIRdK %0:ld8, 3, implicit-def dead $sreg + CPIRdK killed %1:ld8, 0, implicit-def $sreg + BRNEk %bb.0.entry, implicit $sreg + RET +... +--- +# A clobber between the andi and cpi instructions blocks the optimization. +name: clobber_between +body: | + bb.0.entry: + ; CHECK-LABEL: name: clobber_between + ; CHECK: successors: %bb.0(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ld8 = COPY $r0 + ; CHECK-NEXT: [[ANDIRdK:%[0-9]+]]:ld8 = ANDIRdK [[COPY]], 3, implicit-def dead $sreg + ; CHECK-NEXT: [[ADDRdRr:%[0-9]+]]:ld8 = ADDRdRr [[COPY]], [[COPY]], implicit-def dead $sreg + ; CHECK-NEXT: CPIRdK killed [[ANDIRdK]], 0, implicit-def $sreg + ; CHECK-NEXT: BRNEk %bb.0, implicit $sreg + ; CHECK-NEXT: RET + %0:ld8 = COPY $r0 + %1:ld8 = ANDIRdK %0:ld8, 3, implicit-def dead $sreg + %2:ld8 = ADDRdRr %0:ld8, %0:ld8, implicit-def dead $sreg + CPIRdK killed %1:ld8, 0, implicit-def $sreg + BRNEk %bb.0.entry, implicit $sreg + RET +... +--- +# Other instructions (like ldi) don't clobber SREG. +name: noclobber_between +body: | + bb.0.entry: + ; CHECK-LABEL: name: noclobber_between + ; CHECK: successors: %bb.0(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ld8 = COPY $r0 + ; CHECK-NEXT: [[ANDIRdK:%[0-9]+]]:ld8 = ANDIRdK [[COPY]], 3, implicit-def $sreg + ; CHECK-NEXT: [[LDIRdK:%[0-9]+]]:ld8 = LDIRdK 5 + ; CHECK-NEXT: [[LDIRdK1:%[0-9]+]]:ld8 = LDIRdK 6 + ; CHECK-NEXT: BRNEk %bb.0, implicit $sreg + ; CHECK-NEXT: RET + %0:ld8 = COPY $r0 + %1:ld8 = ANDIRdK %0:ld8, 3, implicit-def dead $sreg + %2:ld8 = LDIRdK 5 + CPIRdK killed %1:ld8, 0, implicit-def $sreg + %3:ld8 = LDIRdK 6 + BRNEk %bb.0.entry, implicit $sreg + RET +... +--- +# SREG uses block the optimization (even though in this case BLD doesn't use the +# zero bit). +name: use_after +body: | + bb.0.entry: + ; CHECK-LABEL: name: use_after + ; CHECK: successors: %bb.0(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ld8 = COPY $r0 + ; CHECK-NEXT: [[ANDIRdK:%[0-9]+]]:ld8 = ANDIRdK [[COPY]], 3, implicit-def dead $sreg + ; CHECK-NEXT: CPIRdK killed [[ANDIRdK]], 0, implicit-def $sreg + ; CHECK-NEXT: [[BLD:%[0-9]+]]:ld8 = BLD [[ANDIRdK]], 3, implicit $sreg + ; CHECK-NEXT: BRNEk %bb.0, implicit $sreg + ; CHECK-NEXT: RET + %0:ld8 = COPY $r0 + %1:ld8 = ANDIRdK %0:ld8, 3, implicit-def dead $sreg + CPIRdK killed %1:ld8, 0, implicit-def $sreg + %2:ld8 = BLD %1:ld8, 3, implicit $sreg + BRNEk %bb.0.entry, implicit $sreg + RET