diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt --- a/clang/docs/tools/clang-formatted-files.txt +++ b/clang/docs/tools/clang-formatted-files.txt @@ -6384,7 +6384,6 @@ llvm/lib/Target/AVR/AVRRegisterInfo.cpp llvm/lib/Target/AVR/AVRRegisterInfo.h llvm/lib/Target/AVR/AVRSelectionDAGInfo.h -llvm/lib/Target/AVR/AVRShiftExpand.cpp llvm/lib/Target/AVR/AVRSubtarget.cpp llvm/lib/Target/AVR/AVRSubtarget.h llvm/lib/Target/AVR/AVRTargetMachine.cpp diff --git a/llvm/lib/Target/AVR/AVR.h b/llvm/lib/Target/AVR/AVR.h --- a/llvm/lib/Target/AVR/AVR.h +++ b/llvm/lib/Target/AVR/AVR.h @@ -25,7 +25,6 @@ class FunctionPass; class PassRegistry; -Pass *createAVRShiftExpandPass(); FunctionPass *createAVRISelDag(AVRTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createAVRExpandPseudoPass(); @@ -34,7 +33,6 @@ void initializeAVRDAGToDAGISelPass(PassRegistry &); void initializeAVRExpandPseudoPass(PassRegistry &); -void initializeAVRShiftExpandPass(PassRegistry &); /// Contains the AVR backend. namespace AVR { diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -229,6 +229,11 @@ return false; } +/// Returns whether given logic operation effectively does not depend its +/// first argument. +/// +/// For instrance, `reg & 0x00` will behave the same way regardless of the reg's +/// value. bool AVRExpandPseudo::isLogicRegOpUndef(unsigned Op, unsigned ImmVal) const { // ANDI Rd, 0x00 clears all input bits. if (Op == AVR::ANDIRdK && ImmVal == 0x00) diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -286,11 +286,6 @@ "Expected power-of-2 shift amount"); if (VT.getSizeInBits() == 32) { - if (!isa(N->getOperand(1))) { - // 32-bit shifts are converted to a loop in IR. - // This should be unreachable. - report_fatal_error("Expected a constant shift amount!"); - } SDVTList ResTys = DAG.getVTList(MVT::i16, MVT::i16); SDValue SrcLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0), @@ -298,25 +293,34 @@ SDValue SrcHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0), DAG.getConstant(1, dl, MVT::i16)); - uint64_t ShiftAmount = - cast(N->getOperand(1))->getZExtValue(); - if (ShiftAmount == 16) { - // Special case these two operations because they appear to be used by the - // generic codegen parts to lower 32-bit numbers. - // TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit - // shift of a part of the 32-bit value? - switch (Op.getOpcode()) { - case ISD::SHL: { - SDValue Zero = DAG.getConstant(0, dl, MVT::i16); - return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo); - } - case ISD::SRL: { - SDValue Zero = DAG.getConstant(0, dl, MVT::i16); - return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero); - } + SDValue Cnt; + if (isa(N->getOperand(1))) { + // The amount to shift is known at compile time, so we can create an + // optimized sequence of instructions to shift this value. + uint64_t ShiftAmount = + cast(N->getOperand(1))->getZExtValue(); + if (ShiftAmount == 16) { + // Special case these two operations because they appear to be used by + // the generic codegen parts to lower 32-bit numbers. + // TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit + // shift of a part of the 32-bit value? + switch (Op.getOpcode()) { + case ISD::SHL: { + SDValue Zero = DAG.getConstant(0, dl, MVT::i16); + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo); + } + case ISD::SRL: { + SDValue Zero = DAG.getConstant(0, dl, MVT::i16); + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero); + } + } } + Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8); + } else { + // The shift is not known at compile time, so we have to emit this as a + // loop. + Cnt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(1)); } - SDValue Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8); unsigned Opc; switch (Op.getOpcode()) { default: @@ -1917,20 +1921,20 @@ // shifted. // For more information and background, see this blogpost: // https://aykevl.nl/2021/02/avr-bitshift -static void insertMultibyteShift(MachineInstr &MI, MachineBasicBlock *BB, +static void insertMultibyteShift(MachineBasicBlock::iterator MBBI, + MachineBasicBlock *BB, const DebugLoc &DL, MutableArrayRef> Regs, ISD::NodeType Opc, int64_t ShiftAmt) { const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); const AVRSubtarget &STI = BB->getParent()->getSubtarget(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const DebugLoc &dl = MI.getDebugLoc(); const bool ShiftLeft = Opc == ISD::SHL; const bool ArithmeticShift = Opc == ISD::SRA; // Zero a register, for use in later operations. Register ZeroReg = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ZeroReg) + BuildMI(*BB, MBBI, DL, TII.get(AVR::COPY), ZeroReg) .addReg(STI.getZeroRegister()); // Do a shift modulo 6 or 7. This is a bit more complicated than most shifts @@ -1949,18 +1953,18 @@ // Shift one to the right, keeping the least significant bit as the carry // bit. - insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1); + insertMultibyteShift(MBBI, BB, DL, ShiftRegs, ISD::SRL, 1); // Rotate the least significant bit from the carry bit into a new register // (that starts out zero). Register LowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg); + BuildMI(*BB, MBBI, DL, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg); // Shift one more to the right if this is a modulo-6 shift. if (ShiftAmt % 8 == 6) { - insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1); + insertMultibyteShift(MBBI, BB, DL, ShiftRegs, ISD::SRL, 1); Register NewLowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte); + BuildMI(*BB, MBBI, DL, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte); LowByte = NewLowByte; } @@ -1988,7 +1992,7 @@ Regs.slice(0, ShiftRegsSize); // Shift one to the left. - insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1); + insertMultibyteShift(MBBI, BB, DL, ShiftRegs, ISD::SHL, 1); // Sign or zero extend the most significant register into a new register. // The HighByte is the byte that still has one (or two) bits from the @@ -1998,7 +2002,7 @@ Register ExtByte = 0; if (ArithmeticShift) { // Sign-extend bit that was shifted out last. - BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), HighByte) + BuildMI(*BB, MBBI, DL, TII.get(AVR::SBCRdRr), HighByte) .addReg(HighByte, RegState::Undef) .addReg(HighByte, RegState::Undef); ExtByte = HighByte; @@ -2008,17 +2012,17 @@ // Use the zero register for zero extending. ExtByte = ZeroReg; // Rotate most significant bit into a new register (that starts out zero). - BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), HighByte) + BuildMI(*BB, MBBI, DL, TII.get(AVR::ADCRdRr), HighByte) .addReg(ExtByte) .addReg(ExtByte); } // Shift one more to the left for modulo 6 shifts. if (ShiftAmt % 8 == 6) { - insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1); + insertMultibyteShift(MBBI, BB, DL, ShiftRegs, ISD::SHL, 1); // Shift the topmost bit into the HighByte. Register NewExt = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), NewExt) + BuildMI(*BB, MBBI, DL, TII.get(AVR::ADCRdRr), NewExt) .addReg(HighByte) .addReg(HighByte); HighByte = NewExt; @@ -2063,10 +2067,10 @@ // Sign extend the most significant register into ShrExtendReg. ShrExtendReg = MRI.createVirtualRegister(&AVR::GPR8RegClass); Register Tmp = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Tmp) + BuildMI(*BB, MBBI, DL, TII.get(AVR::ADDRdRr), Tmp) .addReg(Regs[0].first, 0, Regs[0].second) .addReg(Regs[0].first, 0, Regs[0].second); - BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), ShrExtendReg) + BuildMI(*BB, MBBI, DL, TII.get(AVR::SBCRdRr), ShrExtendReg) .addReg(Tmp) .addReg(Tmp); } else { @@ -2112,22 +2116,22 @@ for (size_t I = 0; I < Regs.size(); I++) { size_t Idx = ShiftLeft ? I : Regs.size() - I - 1; Register SwapReg = MRI.createVirtualRegister(&AVR::LD8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::SWAPRd), SwapReg) + BuildMI(*BB, MBBI, DL, TII.get(AVR::SWAPRd), SwapReg) .addReg(Regs[Idx].first, 0, Regs[Idx].second); if (I != 0) { Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R) + BuildMI(*BB, MBBI, DL, TII.get(AVR::EORRdRr), R) .addReg(Prev) .addReg(SwapReg); Prev = R; } Register AndReg = MRI.createVirtualRegister(&AVR::LD8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::ANDIRdK), AndReg) + BuildMI(*BB, MBBI, DL, TII.get(AVR::ANDIRdK), AndReg) .addReg(SwapReg) .addImm(ShiftLeft ? 0xf0 : 0x0f); if (I != 0) { Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R) + BuildMI(*BB, MBBI, DL, TII.get(AVR::EORRdRr), R) .addReg(Prev) .addReg(AndReg); size_t PrevIdx = ShiftLeft ? Idx - 1 : Idx + 1; @@ -2148,11 +2152,11 @@ Register In = Regs[I].first; Register InSubreg = Regs[I].second; if (I == (ssize_t)Regs.size() - 1) { // first iteration - BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Out) + BuildMI(*BB, MBBI, DL, TII.get(AVR::ADDRdRr), Out) .addReg(In, 0, InSubreg) .addReg(In, 0, InSubreg); } else { - BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Out) + BuildMI(*BB, MBBI, DL, TII.get(AVR::ADCRdRr), Out) .addReg(In, 0, InSubreg) .addReg(In, 0, InSubreg); } @@ -2168,9 +2172,10 @@ Register InSubreg = Regs[I].second; if (I == 0) { unsigned Opc = ArithmeticShift ? AVR::ASRRd : AVR::LSRRd; - BuildMI(*BB, MI, dl, TII.get(Opc), Out).addReg(In, 0, InSubreg); + BuildMI(*BB, MBBI, DL, TII.get(Opc), Out).addReg(In, 0, InSubreg); } else { - BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), Out).addReg(In, 0, InSubreg); + BuildMI(*BB, MBBI, DL, TII.get(AVR::RORRd), Out) + .addReg(In, 0, InSubreg); } Regs[I] = std::pair(Out, 0); } @@ -2182,16 +2187,94 @@ } } +// Do a multibyte shift by shifting one bit at a time in a loop. It works very +// similar to insertMultibyteShift in that it modifies the Regs array in-place +// (the output registers are stored in this array on return). +static MachineBasicBlock *insertMultibyteShiftLoop( + MachineInstr &MI, MachineBasicBlock *BB, Register ShiftNum, + MutableArrayRef> Regs, ISD::NodeType Opc) { + const DebugLoc &DL = MI.getDebugLoc(); + MachineFunction *MF = BB->getParent(); + const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + + MachineBasicBlock *EntryBB = BB; + MachineBasicBlock *CheckBB = MF->CreateMachineBasicBlock(BB->getBasicBlock()); + MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock(BB->getBasicBlock()); + + MF->push_back(CheckBB); + MF->push_back(LoopBB); + MachineBasicBlock *ExitBB = EntryBB->splitAt(MI, false); + + CheckBB->moveAfter(EntryBB); + LoopBB->moveAfter(CheckBB); + ExitBB->moveAfter(LoopBB); + + EntryBB->addSuccessor(CheckBB); + LoopBB->addSuccessor(CheckBB); + CheckBB->addSuccessor(LoopBB); + CheckBB->addSuccessor(ExitBB); + EntryBB->removeSuccessor(ExitBB); + + // Create virtual registers for the value phi nodes. + SmallVector PhiRegs; + SmallVector, 4> PhiRegPairs; + + for (size_t I = 0; I < Regs.size(); I++) { + Register Reg = MRI.createVirtualRegister(&AVR::GPR8RegClass); + PhiRegs.push_back(Reg); + PhiRegPairs.push_back(std::pair(Reg, 0)); + } + + // Shift the registers by one. + // + // Note that we build blocks kinda in a reversed-order (in reality LoopBB is + // *after* CheckBB), because in order to build CheckBB, we need to know the + // PHI nodes from LoopBB. + insertMultibyteShift(LoopBB->end(), LoopBB, DL, PhiRegPairs, Opc, 1); + + // Jump back to the loop's body. + BuildMI(LoopBB, DL, TII.get(AVR::RJMPk)).addMBB(CheckBB); + + // Create PHI nodes for the value that is shifted. + for (size_t I = 0; I < Regs.size(); I++) { + auto Pair = Regs[I]; + + BuildMI(CheckBB, DL, TII.get(AVR::PHI), PhiRegs[I]) + .addReg(Pair.first, 0, Pair.second) + .addMBB(EntryBB) + .addReg(PhiRegPairs[I].first, 0, PhiRegPairs[I].second) + .addMBB(LoopBB); + + Regs[I] = std::pair(PhiRegs[I], 0); + } + + // Create a PHI node for the loop counter. + Register CntPhi = MRI.createVirtualRegister(&AVR::GPR8RegClass); + Register CntDec = MRI.createVirtualRegister(&AVR::GPR8RegClass); + + BuildMI(CheckBB, DL, TII.get(AVR::PHI), CntPhi) + .addReg(ShiftNum) + .addMBB(EntryBB) + .addReg(CntDec) + .addMBB(LoopBB); + + // Decrement the counter; if we're done, jump to the exit and otherwise fall + // through to the CheckBB. + BuildMI(CheckBB, DL, TII.get(AVR::DECRd), CntDec).addReg(CntPhi); + BuildMI(CheckBB, DL, TII.get(AVR::BRMIk)).addMBB(ExitBB); + + return ExitBB; +} + // Do a wide (32-bit) shift. MachineBasicBlock * AVRTargetLowering::insertWideShift(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - const DebugLoc &dl = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); + MachineBasicBlock::iterator MBBI(&MI); - // How much to shift to the right (meaning: a negative number indicates a left - // shift). - int64_t ShiftAmt = MI.getOperand(4).getImm(); ISD::NodeType Opc; switch (MI.getOpcode()) { case AVR::Lsl32: @@ -2214,7 +2297,19 @@ }; // Do the shift. The registers are modified in-place. - insertMultibyteShift(MI, BB, Registers, Opc, ShiftAmt); + int64_t ShiftAmt = 1; + if (MI.getOperand(4).isImm()) { + // The shift amount is known at compile time. + ShiftAmt = MI.getOperand(4).getImm(); + insertMultibyteShift(MBBI, BB, MI.getDebugLoc(), Registers, Opc, ShiftAmt); + } else { + // The shift amount is not known at compile time. We need to create a loop. + Register ShiftNum = MI.getOperand(4).getReg(); + BB = insertMultibyteShiftLoop(MI, BB, ShiftNum, Registers, Opc); + + // Insert REG_SEQUENCE instructions at the beginning of ExitBB. + MBBI = BB->begin(); + } // Combine the 8-bit registers into 16-bit register pairs. // This done either from LSB to MSB or from MSB to LSB, depending on the @@ -2230,24 +2325,28 @@ if (Opc != ISD::SHL && (Opc != ISD::SRA || (ShiftAmt < 16 || ShiftAmt >= 22))) { // Use the resulting registers starting with the least significant byte. - BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg()) + BuildMI(*BB, MBBI, DL, TII.get(AVR::REG_SEQUENCE), + MI.getOperand(0).getReg()) .addReg(Registers[3].first, 0, Registers[3].second) .addImm(AVR::sub_lo) .addReg(Registers[2].first, 0, Registers[2].second) .addImm(AVR::sub_hi); - BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg()) + BuildMI(*BB, MBBI, DL, TII.get(AVR::REG_SEQUENCE), + MI.getOperand(1).getReg()) .addReg(Registers[1].first, 0, Registers[1].second) .addImm(AVR::sub_lo) .addReg(Registers[0].first, 0, Registers[0].second) .addImm(AVR::sub_hi); } else { // Use the resulting registers starting with the most significant byte. - BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg()) + BuildMI(*BB, MBBI, DL, TII.get(AVR::REG_SEQUENCE), + MI.getOperand(1).getReg()) .addReg(Registers[0].first, 0, Registers[0].second) .addImm(AVR::sub_hi) .addReg(Registers[1].first, 0, Registers[1].second) .addImm(AVR::sub_lo); - BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg()) + BuildMI(*BB, MBBI, DL, TII.get(AVR::REG_SEQUENCE), + MI.getOperand(0).getReg()) .addReg(Registers[2].first, 0, Registers[2].second) .addImm(AVR::sub_hi) .addReg(Registers[3].first, 0, Registers[3].second) diff --git a/llvm/lib/Target/AVR/AVRShiftExpand.cpp b/llvm/lib/Target/AVR/AVRShiftExpand.cpp deleted file mode 100644 --- a/llvm/lib/Target/AVR/AVRShiftExpand.cpp +++ /dev/null @@ -1,147 +0,0 @@ -//===- AVRShift.cpp - Shift Expansion Pass --------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just -/// like avr-gcc. This must be done in IR because otherwise the type legalizer -/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3. -// -//===----------------------------------------------------------------------===// - -#include "AVR.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" - -using namespace llvm; - -namespace { - -class AVRShiftExpand : public FunctionPass { -public: - static char ID; - - AVRShiftExpand() : FunctionPass(ID) {} - - bool runOnFunction(Function &F) override; - - StringRef getPassName() const override { return "AVR Shift Expansion"; } - -private: - void expand(BinaryOperator *BI); -}; - -} // end of anonymous namespace - -char AVRShiftExpand::ID = 0; - -INITIALIZE_PASS(AVRShiftExpand, "avr-shift-expand", "AVR Shift Expansion", - false, false) - -Pass *llvm::createAVRShiftExpandPass() { return new AVRShiftExpand(); } - -bool AVRShiftExpand::runOnFunction(Function &F) { - SmallVector ShiftInsts; - auto &Ctx = F.getContext(); - for (Instruction &I : instructions(F)) { - if (!I.isShift()) - // Only expand shift instructions (shl, lshr, ashr). - continue; - if (I.getType() != Type::getInt32Ty(Ctx)) - // Only expand plain i32 types. - continue; - if (isa(I.getOperand(1))) - // Only expand when the shift amount is not known. - // Known shift amounts are (currently) better expanded inline. - continue; - ShiftInsts.push_back(cast(&I)); - } - - // The expanding itself needs to be done separately as expand() will remove - // these instructions. Removing instructions while iterating over a basic - // block is not a great idea. - for (auto *I : ShiftInsts) { - expand(I); - } - - // Return whether this function expanded any shift instructions. - return ShiftInsts.size() > 0; -} - -void AVRShiftExpand::expand(BinaryOperator *BI) { - auto &Ctx = BI->getContext(); - IRBuilder<> Builder(BI); - Type *Int32Ty = Type::getInt32Ty(Ctx); - Type *Int8Ty = Type::getInt8Ty(Ctx); - Value *Int8Zero = ConstantInt::get(Int8Ty, 0); - - // Split the current basic block at the point of the existing shift - // instruction and insert a new basic block for the loop. - BasicBlock *BB = BI->getParent(); - Function *F = BB->getParent(); - BasicBlock *EndBB = BB->splitBasicBlock(BI, "shift.done"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "shift.loop", F, EndBB); - - // Truncate the shift amount to i8, which is trivially lowered to a single - // AVR register. - Builder.SetInsertPoint(&BB->back()); - Value *ShiftAmount = Builder.CreateTrunc(BI->getOperand(1), Int8Ty); - - // Replace the unconditional branch that splitBasicBlock created with a - // conditional branch. - Value *Cmp1 = Builder.CreateICmpEQ(ShiftAmount, Int8Zero); - Builder.CreateCondBr(Cmp1, EndBB, LoopBB); - BB->back().eraseFromParent(); - - // Create the loop body starting with PHI nodes. - Builder.SetInsertPoint(LoopBB); - PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2); - ShiftAmountPHI->addIncoming(ShiftAmount, BB); - PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2); - ValuePHI->addIncoming(BI->getOperand(0), BB); - - // Subtract the shift amount by one, as we're shifting one this loop - // iteration. - Value *ShiftAmountSub = - Builder.CreateSub(ShiftAmountPHI, ConstantInt::get(Int8Ty, 1)); - ShiftAmountPHI->addIncoming(ShiftAmountSub, LoopBB); - - // Emit the actual shift instruction. The difference is that this shift - // instruction has a constant shift amount, which can be emitted inline - // without a library call. - Value *ValueShifted; - switch (BI->getOpcode()) { - case Instruction::Shl: - ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1)); - break; - case Instruction::LShr: - ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1)); - break; - case Instruction::AShr: - ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1)); - break; - default: - llvm_unreachable("asked to expand an instruction that is not a shift"); - } - ValuePHI->addIncoming(ValueShifted, LoopBB); - - // Branch to either the loop again (if there is more to shift) or to the - // basic block after the loop (if all bits are shifted). - Value *Cmp2 = Builder.CreateICmpEQ(ShiftAmountSub, Int8Zero); - Builder.CreateCondBr(Cmp2, EndBB, LoopBB); - - // Collect the resulting value. This is necessary in the IR but won't produce - // any actual instructions. - Builder.SetInsertPoint(BI); - PHINode *Result = Builder.CreatePHI(Int32Ty, 2); - Result->addIncoming(BI->getOperand(0), BB); - Result->addIncoming(ValueShifted, LoopBB); - - // Replace the original shift instruction. - BI->replaceAllUsesWith(Result); - BI->eraseFromParent(); -} diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp --- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp +++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp @@ -68,7 +68,6 @@ return getTM(); } - void addIRPasses() override; bool addInstSelector() override; void addPreSched2() override; void addPreEmitPass() override; @@ -79,22 +78,12 @@ return new AVRPassConfig(*this, PM); } -void AVRPassConfig::addIRPasses() { - // Expand instructions like - // %result = shl i32 %n, %amount - // to a loop so that library calls are avoided. - addPass(createAVRShiftExpandPass()); - - TargetPassConfig::addIRPasses(); -} - extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() { // Register the target. RegisterTargetMachine X(getTheAVRTarget()); auto &PR = *PassRegistry::getPassRegistry(); initializeAVRExpandPseudoPass(PR); - initializeAVRShiftExpandPass(PR); initializeAVRDAGToDAGISelPass(PR); } diff --git a/llvm/lib/Target/AVR/CMakeLists.txt b/llvm/lib/Target/AVR/CMakeLists.txt --- a/llvm/lib/Target/AVR/CMakeLists.txt +++ b/llvm/lib/Target/AVR/CMakeLists.txt @@ -23,7 +23,6 @@ AVRISelLowering.cpp AVRMCInstLower.cpp AVRRegisterInfo.cpp - AVRShiftExpand.cpp AVRSubtarget.cpp AVRTargetMachine.cpp AVRTargetObjectFile.cpp diff --git a/llvm/test/CodeGen/AVR/shift-expand.ll b/llvm/test/CodeGen/AVR/shift-expand.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AVR/shift-expand.ll +++ /dev/null @@ -1,89 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -avr-shift-expand -S %s -o - | FileCheck %s - -; The avr-shift-expand pass expands large shifts with a non-constant shift -; amount to a loop. These loops avoid generating a (non-existing) builtin such -; as __ashlsi3. - -target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8" -target triple = "avr" - -define i32 @shl(i32 %value, i32 %amount) addrspace(1) { -; CHECK-LABEL: @shl( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] -; CHECK: shift.loop: -; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1 -; CHECK-NEXT: [[TMP6]] = shl i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0 -; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] -; CHECK: shift.done: -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: ret i32 [[TMP8]] -; - %result = shl i32 %value, %amount - ret i32 %result -} - -define i32 @lshr(i32 %value, i32 %amount) addrspace(1) { -; CHECK-LABEL: @lshr( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] -; CHECK: shift.loop: -; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1 -; CHECK-NEXT: [[TMP6]] = lshr i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0 -; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] -; CHECK: shift.done: -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: ret i32 [[TMP8]] -; - %result = lshr i32 %value, %amount - ret i32 %result -} - -define i32 @ashr(i32 %0, i32 %1) addrspace(1) { -; CHECK-LABEL: @ashr( -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1:%.*]] to i8 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i8 [[TMP3]], 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] -; CHECK: shift.loop: -; CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[TMP2:%.*]] ], [ [[TMP7:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP0:%.*]], [[TMP2]] ], [ [[TMP8:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP7]] = sub i8 [[TMP5]], 1 -; CHECK-NEXT: [[TMP8]] = ashr i32 [[TMP6]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[TMP9]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] -; CHECK: shift.done: -; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP0]], [[TMP2]] ], [ [[TMP8]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: ret i32 [[TMP10]] -; - %3 = ashr i32 %0, %1 - ret i32 %3 -} - -; This function is not modified because it is not an i32. -define i40 @shl40(i40 %value, i40 %amount) addrspace(1) { -; CHECK-LABEL: @shl40( -; CHECK-NEXT: [[RESULT:%.*]] = shl i40 [[VALUE:%.*]], [[AMOUNT:%.*]] -; CHECK-NEXT: ret i40 [[RESULT]] -; - %result = shl i40 %value, %amount - ret i40 %result -} - -; This function isn't either, although perhaps it should. -define i24 @shl24(i24 %value, i24 %amount) addrspace(1) { -; CHECK-LABEL: @shl24( -; CHECK-NEXT: [[RESULT:%.*]] = shl i24 [[VALUE:%.*]], [[AMOUNT:%.*]] -; CHECK-NEXT: ret i24 [[RESULT]] -; - %result = shl i24 %value, %amount - ret i24 %result -} diff --git a/llvm/test/CodeGen/AVR/shift-loop.ll b/llvm/test/CodeGen/AVR/shift-loop.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AVR/shift-loop.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -mtriple=avr -verify-machineinstrs -stop-after=dead-mi-elimination | FileCheck %s + +; This test shows the machine IR that is generated when lowering a shift +; operation to a loop. + +define i32 @shl_i32_n(i32 %a, i32 %b) #0 { + ; CHECK-LABEL: name: shl_i32_n + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $r23r22, $r25r24, $r19r18 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dregs = COPY $r19r18 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dregs = COPY $r25r24 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dregs = COPY $r23r22 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr8 = COPY [[COPY]].sub_lo + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr8 = PHI [[COPY1]].sub_hi, %bb.0, %15, %bb.2 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr8 = PHI [[COPY1]].sub_lo, %bb.0, %14, %bb.2 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gpr8 = PHI [[COPY2]].sub_hi, %bb.0, %13, %bb.2 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr8 = PHI [[COPY2]].sub_lo, %bb.0, %12, %bb.2 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gpr8 = PHI [[COPY3]], %bb.0, %17, %bb.2 + ; CHECK-NEXT: [[DECRd:%[0-9]+]]:gpr8 = DECRd [[PHI4]], implicit-def $sreg + ; CHECK-NEXT: BRMIk %bb.3, implicit $sreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDRdRr:%[0-9]+]]:gpr8 = ADDRdRr [[PHI3]], [[PHI3]], implicit-def $sreg + ; CHECK-NEXT: [[ADCRdRr:%[0-9]+]]:gpr8 = ADCRdRr [[PHI2]], [[PHI2]], implicit-def $sreg, implicit $sreg + ; CHECK-NEXT: [[ADCRdRr1:%[0-9]+]]:gpr8 = ADCRdRr [[PHI1]], [[PHI1]], implicit-def $sreg, implicit $sreg + ; CHECK-NEXT: [[ADCRdRr2:%[0-9]+]]:gpr8 = ADCRdRr [[PHI]], [[PHI]], implicit-def $sreg, implicit $sreg + ; CHECK-NEXT: RJMPk %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3 (%ir-block.0): + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:dregs = REG_SEQUENCE [[PHI]], %subreg.sub_hi, [[PHI1]], %subreg.sub_lo + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:dregs = REG_SEQUENCE [[PHI2]], %subreg.sub_hi, [[PHI3]], %subreg.sub_lo + ; CHECK-NEXT: $r23r22 = COPY [[REG_SEQUENCE1]] + ; CHECK-NEXT: $r25r24 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: RET implicit $r23r22, implicit $r25r24, implicit $r1 + %res = shl i32 %a, %b + ret i32 %res +} diff --git a/llvm/test/CodeGen/AVR/shift32.ll b/llvm/test/CodeGen/AVR/shift32.ll --- a/llvm/test/CodeGen/AVR/shift32.ll +++ b/llvm/test/CodeGen/AVR/shift32.ll @@ -1,6 +1,67 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=avr -mattr=movw -verify-machineinstrs | FileCheck %s +; Shift by a number unknown at compile time. +; The 'optsize' attribute is set to avoid duplicating part of the loop. +; TODO: it is more efficent to jump at the start and do the check where the +; 'rjmp' is now. The branch relaxation pass puts them in this non-optimal order. + +define i32 @shl_i32_n(i32 %a, i32 %b) #0 { +; CHECK-LABEL: shl_i32_n: +; CHECK: ; %bb.0: +; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: dec r18 +; CHECK-NEXT: brmi .LBB0_3 +; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: rol r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: rjmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: ret + %res = shl i32 %a, %b + ret i32 %res +} + +define i32 @lshr_i32_n(i32 %a, i32 %b) #0 { +; CHECK-LABEL: lshr_i32_n: +; CHECK: ; %bb.0: +; CHECK-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: dec r18 +; CHECK-NEXT: brmi .LBB1_3 +; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: lsr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: rjmp .LBB1_1 +; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: ret + %res = lshr i32 %a, %b + ret i32 %res +} + +define i32 @ashr_i32_n(i32 %a, i32 %b) #0 { +; CHECK-LABEL: ashr_i32_n: +; CHECK: ; %bb.0: +; CHECK-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: dec r18 +; CHECK-NEXT: brmi .LBB2_3 +; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: asr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: rjmp .LBB2_1 +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: ret + %res = ashr i32 %a, %b + ret i32 %res +} + +; Shift by a constant known at compile time. + define i32 @shl_i32_1(i32 %a) { ; CHECK-LABEL: shl_i32_1: ; CHECK: ; %bb.0: @@ -575,3 +636,5 @@ %res = ashr i32 %a, 31 ret i32 %res } + +attributes #0 = { optsize } diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AVR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AVR/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/Target/AVR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AVR/BUILD.gn @@ -37,7 +37,6 @@ "AVRInstrInfo.cpp", "AVRMCInstLower.cpp", "AVRRegisterInfo.cpp", - "AVRShiftExpand.cpp", "AVRSubtarget.cpp", "AVRTargetMachine.cpp", "AVRTargetObjectFile.cpp",