diff --git a/llvm/lib/Target/AVR/AVR.h b/llvm/lib/Target/AVR/AVR.h --- a/llvm/lib/Target/AVR/AVR.h +++ b/llvm/lib/Target/AVR/AVR.h @@ -25,7 +25,6 @@ class FunctionPass; class PassRegistry; -Pass *createAVRShiftExpandPass(); FunctionPass *createAVRISelDag(AVRTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createAVRExpandPseudoPass(); @@ -34,7 +33,6 @@ void initializeAVRDAGToDAGISelPass(PassRegistry &); void initializeAVRExpandPseudoPass(PassRegistry &); -void initializeAVRShiftExpandPass(PassRegistry &); /// Contains the AVR backend. namespace AVR { diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -70,6 +70,7 @@ bool expandLogic(unsigned Op, Block &MBB, BlockIt MBBI); bool expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI); bool isLogicImmOpRedundant(unsigned Op, unsigned ImmVal) const; + bool isLogicRegOpUndef(unsigned Op, unsigned ImmVal) const; template bool expandAtomic(Block &MBB, BlockIt MBBI, Func f); @@ -214,7 +215,6 @@ bool AVRExpandPseudo::isLogicImmOpRedundant(unsigned Op, unsigned ImmVal) const { - // ANDI Rd, 0xff is redundant. if (Op == AVR::ANDIRdK && ImmVal == 0xff) return true; @@ -226,6 +226,18 @@ return false; } +bool AVRExpandPseudo::isLogicRegOpUndef(unsigned Op, unsigned ImmVal) const { + // ANDI Rd, 0x00 clears all input bits. + if (Op == AVR::ANDIRdK && ImmVal == 0x00) + return true; + + // ORI Rd, 0xff sets all input bits. + if (Op == AVR::ORIRdK && ImmVal == 0xff) + return true; + + return false; +} + bool AVRExpandPseudo::expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; @@ -247,6 +259,10 @@ // SREG is always implicitly dead MIBLO->getOperand(3).setIsDead(); + + if (isLogicRegOpUndef(Op, Lo8)) { + MIBLO->getOperand(1).setIsUndef(true); + } } if (!isLogicImmOpRedundant(Op, Hi8)) { @@ -258,6 +274,10 @@ if (ImpIsDead) MIBHI->getOperand(3).setIsDead(); + + if (isLogicRegOpUndef(Op, Hi8)) { + MIBHI->getOperand(1).setIsUndef(true); + } } MI.eraseFromParent(); diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -286,11 +286,6 @@ "Expected power-of-2 shift amount"); if (VT.getSizeInBits() == 32) { - if (!isa(N->getOperand(1))) { - // 32-bit shifts are converted to a loop in IR. - // This should be unreachable. - report_fatal_error("Expected a constant shift amount!"); - } SDVTList ResTys = DAG.getVTList(MVT::i16, MVT::i16); SDValue SrcLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0), @@ -298,25 +293,34 @@ SDValue SrcHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0), DAG.getConstant(1, dl, MVT::i16)); - uint64_t ShiftAmount = - cast(N->getOperand(1))->getZExtValue(); - if (ShiftAmount == 16) { - // Special case these two operations because they appear to be used by the - // generic codegen parts to lower 32-bit numbers. - // TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit - // shift of a part of the 32-bit value? - switch (Op.getOpcode()) { - case ISD::SHL: { - SDValue Zero = DAG.getConstant(0, dl, MVT::i16); - return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo); - } - case ISD::SRL: { - SDValue Zero = DAG.getConstant(0, dl, MVT::i16); - return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero); - } + SDValue Cnt; + if (isa(N->getOperand(1))) { + // The amount to shift is known at compile time, so we can create an + // optimized sequence of instructions to shift this value. + uint64_t ShiftAmount = + cast(N->getOperand(1))->getZExtValue(); + if (ShiftAmount == 16) { + // Special case these two operations because they appear to be used by + // the generic codegen parts to lower 32-bit numbers. + // TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit + // shift of a part of the 32-bit value? + switch (Op.getOpcode()) { + case ISD::SHL: { + SDValue Zero = DAG.getConstant(0, dl, MVT::i16); + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo); + } + case ISD::SRL: { + SDValue Zero = DAG.getConstant(0, dl, MVT::i16); + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero); + } + } } + Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8); + } else { + // The shift is not known at compile time, so we have to emit this as a + // loop. + Cnt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(1)); } - SDValue Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8); unsigned Opc; switch (Op.getOpcode()) { default: @@ -1893,20 +1897,20 @@ // shifted. // For more information and background, see this blogpost: // https://aykevl.nl/2021/02/avr-bitshift -static void insertMultibyteShift(MachineInstr &MI, MachineBasicBlock *BB, +static void insertMultibyteShift(MachineBasicBlock::iterator MBBI, + MachineBasicBlock *BB, const DebugLoc &dl, MutableArrayRef> Regs, ISD::NodeType Opc, int64_t ShiftAmt) { const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); const AVRSubtarget &STI = BB->getParent()->getSubtarget(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const DebugLoc &dl = MI.getDebugLoc(); const bool ShiftLeft = Opc == ISD::SHL; const bool ArithmeticShift = Opc == ISD::SRA; // Zero a register, for use in later operations. Register ZeroReg = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ZeroReg) + BuildMI(*BB, MBBI, dl, TII.get(AVR::COPY), ZeroReg) .addReg(STI.getZeroRegister()); // Do a shift modulo 6 or 7. This is a bit more complicated than most shifts @@ -1925,18 +1929,18 @@ // Shift one to the right, keeping the least significant bit as the carry // bit. - insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1); + insertMultibyteShift(MBBI, BB, dl, ShiftRegs, ISD::SRL, 1); // Rotate the least significant bit from the carry bit into a new register // (that starts out zero). Register LowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg); + BuildMI(*BB, MBBI, dl, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg); // Shift one more to the right if this is a modulo-6 shift. if (ShiftAmt % 8 == 6) { - insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1); + insertMultibyteShift(MBBI, BB, dl, ShiftRegs, ISD::SRL, 1); Register NewLowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte); + BuildMI(*BB, MBBI, dl, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte); LowByte = NewLowByte; } @@ -1964,7 +1968,7 @@ Regs.slice(0, ShiftRegsSize); // Shift one to the left. - insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1); + insertMultibyteShift(MBBI, BB, dl, ShiftRegs, ISD::SHL, 1); // Sign or zero extend the most significant register into a new register. // The HighByte is the byte that still has one (or two) bits from the @@ -1974,7 +1978,7 @@ Register ExtByte = 0; if (ArithmeticShift) { // Sign-extend bit that was shifted out last. - BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), HighByte) + BuildMI(*BB, MBBI, dl, TII.get(AVR::SBCRdRr), HighByte) .addReg(HighByte, RegState::Undef) .addReg(HighByte, RegState::Undef); ExtByte = HighByte; @@ -1984,17 +1988,17 @@ // Use the zero register for zero extending. ExtByte = ZeroReg; // Rotate most significant bit into a new register (that starts out zero). - BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), HighByte) + BuildMI(*BB, MBBI, dl, TII.get(AVR::ADCRdRr), HighByte) .addReg(ExtByte) .addReg(ExtByte); } // Shift one more to the left for modulo 6 shifts. if (ShiftAmt % 8 == 6) { - insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1); + insertMultibyteShift(MBBI, BB, dl, ShiftRegs, ISD::SHL, 1); // Shift the topmost bit into the HighByte. Register NewExt = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), NewExt) + BuildMI(*BB, MBBI, dl, TII.get(AVR::ADCRdRr), NewExt) .addReg(HighByte) .addReg(HighByte); HighByte = NewExt; @@ -2039,10 +2043,10 @@ // Sign extend the most significant register into ShrExtendReg. ShrExtendReg = MRI.createVirtualRegister(&AVR::GPR8RegClass); Register Tmp = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Tmp) + BuildMI(*BB, MBBI, dl, TII.get(AVR::ADDRdRr), Tmp) .addReg(Regs[0].first, 0, Regs[0].second) .addReg(Regs[0].first, 0, Regs[0].second); - BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), ShrExtendReg) + BuildMI(*BB, MBBI, dl, TII.get(AVR::SBCRdRr), ShrExtendReg) .addReg(Tmp) .addReg(Tmp); } else { @@ -2088,22 +2092,22 @@ for (size_t I = 0; I < Regs.size(); I++) { size_t Idx = ShiftLeft ? I : Regs.size() - I - 1; Register SwapReg = MRI.createVirtualRegister(&AVR::LD8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::SWAPRd), SwapReg) + BuildMI(*BB, MBBI, dl, TII.get(AVR::SWAPRd), SwapReg) .addReg(Regs[Idx].first, 0, Regs[Idx].second); if (I != 0) { Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R) + BuildMI(*BB, MBBI, dl, TII.get(AVR::EORRdRr), R) .addReg(Prev) .addReg(SwapReg); Prev = R; } Register AndReg = MRI.createVirtualRegister(&AVR::LD8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::ANDIRdK), AndReg) + BuildMI(*BB, MBBI, dl, TII.get(AVR::ANDIRdK), AndReg) .addReg(SwapReg) .addImm(ShiftLeft ? 0xf0 : 0x0f); if (I != 0) { Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass); - BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R) + BuildMI(*BB, MBBI, dl, TII.get(AVR::EORRdRr), R) .addReg(Prev) .addReg(AndReg); size_t PrevIdx = ShiftLeft ? Idx - 1 : Idx + 1; @@ -2124,11 +2128,11 @@ Register In = Regs[I].first; Register InSubreg = Regs[I].second; if (I == (ssize_t)Regs.size() - 1) { // first iteration - BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Out) + BuildMI(*BB, MBBI, dl, TII.get(AVR::ADDRdRr), Out) .addReg(In, 0, InSubreg) .addReg(In, 0, InSubreg); } else { - BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Out) + BuildMI(*BB, MBBI, dl, TII.get(AVR::ADCRdRr), Out) .addReg(In, 0, InSubreg) .addReg(In, 0, InSubreg); } @@ -2144,9 +2148,10 @@ Register InSubreg = Regs[I].second; if (I == 0) { unsigned Opc = ArithmeticShift ? AVR::ASRRd : AVR::LSRRd; - BuildMI(*BB, MI, dl, TII.get(Opc), Out).addReg(In, 0, InSubreg); + BuildMI(*BB, MBBI, dl, TII.get(Opc), Out).addReg(In, 0, InSubreg); } else { - BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), Out).addReg(In, 0, InSubreg); + BuildMI(*BB, MBBI, dl, TII.get(AVR::RORRd), Out) + .addReg(In, 0, InSubreg); } Regs[I] = std::pair(Out, 0); } @@ -2158,16 +2163,99 @@ } } +// Do a multibyte shift by shifting one bit at a time in a loop. It works very +// similar to insertMultibyteShift in that it modifies the Regs array in-place +// (the output registers are stored in this array on return). +static MachineBasicBlock *insertMultibyteShiftLoop( + MachineInstr &MI, MachineBasicBlock *BB, Register ShiftNum, + MutableArrayRef> Regs, ISD::NodeType Opc) { + const DebugLoc &dl = MI.getDebugLoc(); + MachineFunction *MF = BB->getParent(); + const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + + // Create the necessary loop blocks in the right order. + MachineBasicBlock *EntryBB = BB; + MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock(BB->getBasicBlock()); + MachineBasicBlock *CheckBB = MF->CreateMachineBasicBlock(BB->getBasicBlock()); + MF->push_back(LoopBB); + MF->push_back(CheckBB); + MachineBasicBlock *ExitBB = EntryBB->splitAt(MI, false); + if (EntryBB == ExitBB) { + // This can sometimes happen when the shift instruction is at the end of a + // block, and flow control falls through to the next block. + // But we do still need a separate block, so insert an (unnecessary) jump + // instruction here. + assert(EntryBB->canFallThrough() && "Expected a fallthrough block!"); + MachineBasicBlock *Fallthrough = EntryBB->getFallThrough(); + BuildMI(EntryBB, dl, TII.get(AVR::RJMPk)).addMBB(Fallthrough); + ExitBB = EntryBB->splitAt(MI, false); + } + assert((ExitBB != EntryBB) && "Expected the block to be split!"); + LoopBB->moveAfter(EntryBB); + CheckBB->moveAfter(LoopBB); + ExitBB->moveAfter(CheckBB); + + // Connect the blocks. + EntryBB->addSuccessor(CheckBB); + LoopBB->addSuccessor(CheckBB); + CheckBB->addSuccessor(LoopBB); + CheckBB->addSuccessor(ExitBB); + EntryBB->removeSuccessor(ExitBB); + + // Jump from the entry block into the loop header. + BuildMI(*EntryBB, MI, dl, TII.get(AVR::RJMPk)).addMBB(CheckBB); + + // Create virtual registers for the value phi nodes. + SmallVector PhiRegs; + SmallVector, 4> PhiRegPairs; + for (size_t I = 0; I < Regs.size(); I++) { + Register Reg = MRI.createVirtualRegister(&AVR::GPR8RegClass); + PhiRegs.push_back(Reg); + PhiRegPairs.push_back(std::pair(Reg, 0)); + } + + // Shift the registers by one. + insertMultibyteShift(LoopBB->end(), LoopBB, dl, PhiRegPairs, Opc, 1); + + // Create PHI nodes for the value that is shifted. + for (size_t I = 0; I < Regs.size(); I++) { + auto Pair = Regs[I]; + BuildMI(CheckBB, dl, TII.get(AVR::PHI), PhiRegs[I]) + .addReg(Pair.first, 0, Pair.second) + .addMBB(EntryBB) + .addReg(PhiRegPairs[I].first, 0, PhiRegPairs[I].second) + .addMBB(LoopBB); + Regs[I] = std::pair(PhiRegs[I], 0); + } + + // Create a PHI node for the loop counter. + Register CntPhi = MRI.createVirtualRegister(&AVR::GPR8RegClass); + Register CntDec = MRI.createVirtualRegister(&AVR::GPR8RegClass); + BuildMI(CheckBB, dl, TII.get(AVR::PHI), CntPhi) + .addReg(ShiftNum) + .addMBB(EntryBB) + .addReg(CntDec) + .addMBB(LoopBB); + + // Decrement the counter. Jump to the loop body if we're not finished. Else, + // fall through to the next basic block. + BuildMI(CheckBB, dl, TII.get(AVR::DECRd), CntDec).addReg(CntPhi); + BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB); + + return ExitBB; +} + // Do a wide (32-bit) shift. MachineBasicBlock * AVRTargetLowering::insertWideShift(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); const DebugLoc &dl = MI.getDebugLoc(); + MachineBasicBlock::iterator MBBI(&MI); // How much to shift to the right (meaning: a negative number indicates a left // shift). - int64_t ShiftAmt = MI.getOperand(4).getImm(); ISD::NodeType Opc; switch (MI.getOpcode()) { case AVR::Lsl32: @@ -2190,7 +2278,19 @@ }; // Do the shift. The registers are modified in-place. - insertMultibyteShift(MI, BB, Registers, Opc, ShiftAmt); + int64_t ShiftAmt = 1; + if (MI.getOperand(4).isImm()) { + // The shift amount is known at compile time. + ShiftAmt = MI.getOperand(4).getImm(); + insertMultibyteShift(MBBI, BB, MI.getDebugLoc(), Registers, Opc, ShiftAmt); + } else { + // The shift amount is not known at compile time. We need to create a loop. + Register ShiftNum = MI.getOperand(4).getReg(); + BB = insertMultibyteShiftLoop(MI, BB, ShiftNum, Registers, Opc); + + // Insert REG_SEQUENCE instructions at the beginning of ExitBB. + MBBI = BB->begin(); + } // Combine the 8-bit registers into 16-bit register pairs. // This done either from LSB to MSB or from MSB to LSB, depending on the @@ -2206,24 +2306,28 @@ if (Opc != ISD::SHL && (Opc != ISD::SRA || (ShiftAmt < 16 || ShiftAmt >= 22))) { // Use the resulting registers starting with the least significant byte. - BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg()) + BuildMI(*BB, MBBI, dl, TII.get(AVR::REG_SEQUENCE), + MI.getOperand(0).getReg()) .addReg(Registers[3].first, 0, Registers[3].second) .addImm(AVR::sub_lo) .addReg(Registers[2].first, 0, Registers[2].second) .addImm(AVR::sub_hi); - BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg()) + BuildMI(*BB, MBBI, dl, TII.get(AVR::REG_SEQUENCE), + MI.getOperand(1).getReg()) .addReg(Registers[1].first, 0, Registers[1].second) .addImm(AVR::sub_lo) .addReg(Registers[0].first, 0, Registers[0].second) .addImm(AVR::sub_hi); } else { // Use the resulting registers starting with the most significant byte. - BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg()) + BuildMI(*BB, MBBI, dl, TII.get(AVR::REG_SEQUENCE), + MI.getOperand(1).getReg()) .addReg(Registers[0].first, 0, Registers[0].second) .addImm(AVR::sub_hi) .addReg(Registers[1].first, 0, Registers[1].second) .addImm(AVR::sub_lo); - BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg()) + BuildMI(*BB, MBBI, dl, TII.get(AVR::REG_SEQUENCE), + MI.getOperand(0).getReg()) .addReg(Registers[2].first, 0, Registers[2].second) .addImm(AVR::sub_hi) .addReg(Registers[3].first, 0, Registers[3].second) diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -58,16 +58,21 @@ TRI.splitReg(DestReg, DestLo, DestHi); TRI.splitReg(SrcReg, SrcLo, SrcHi); + // Emit the copies. + // The original instruction was for a register pair, of which only one + // register might have been live. Add 'undef' to satisfy the machine + // verifier. + // TODO: eliminate these unnecessary copies. if (DestLo == SrcHi) { BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi) - .addReg(SrcHi, getKillRegState(KillSrc)); + .addReg(SrcHi, getKillRegState(KillSrc) | RegState::Undef); BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo) - .addReg(SrcLo, getKillRegState(KillSrc)); + .addReg(SrcLo, getKillRegState(KillSrc) | RegState::Undef); } else { BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo) - .addReg(SrcLo, getKillRegState(KillSrc)); + .addReg(SrcLo, getKillRegState(KillSrc) | RegState::Undef); BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi) - .addReg(SrcHi, getKillRegState(KillSrc)); + .addReg(SrcHi, getKillRegState(KillSrc) | RegState::Undef); } } } else { diff --git a/llvm/lib/Target/AVR/AVRShiftExpand.cpp b/llvm/lib/Target/AVR/AVRShiftExpand.cpp deleted file mode 100644 --- a/llvm/lib/Target/AVR/AVRShiftExpand.cpp +++ /dev/null @@ -1,147 +0,0 @@ -//===- AVRShift.cpp - Shift Expansion Pass --------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just -/// like avr-gcc. This must be done in IR because otherwise the type legalizer -/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3. -// -//===----------------------------------------------------------------------===// - -#include "AVR.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" - -using namespace llvm; - -namespace { - -class AVRShiftExpand : public FunctionPass { -public: - static char ID; - - AVRShiftExpand() : FunctionPass(ID) {} - - bool runOnFunction(Function &F) override; - - StringRef getPassName() const override { return "AVR Shift Expansion"; } - -private: - void expand(BinaryOperator *BI); -}; - -} // end of anonymous namespace - -char AVRShiftExpand::ID = 0; - -INITIALIZE_PASS(AVRShiftExpand, "avr-shift-expand", "AVR Shift Expansion", - false, false) - -Pass *llvm::createAVRShiftExpandPass() { return new AVRShiftExpand(); } - -bool AVRShiftExpand::runOnFunction(Function &F) { - SmallVector ShiftInsts; - auto &Ctx = F.getContext(); - for (Instruction &I : instructions(F)) { - if (!I.isShift()) - // Only expand shift instructions (shl, lshr, ashr). - continue; - if (I.getType() != Type::getInt32Ty(Ctx)) - // Only expand plain i32 types. - continue; - if (isa(I.getOperand(1))) - // Only expand when the shift amount is not known. - // Known shift amounts are (currently) better expanded inline. - continue; - ShiftInsts.push_back(cast(&I)); - } - - // The expanding itself needs to be done separately as expand() will remove - // these instructions. Removing instructions while iterating over a basic - // block is not a great idea. - for (auto *I : ShiftInsts) { - expand(I); - } - - // Return whether this function expanded any shift instructions. - return ShiftInsts.size() > 0; -} - -void AVRShiftExpand::expand(BinaryOperator *BI) { - auto &Ctx = BI->getContext(); - IRBuilder<> Builder(BI); - Type *Int32Ty = Type::getInt32Ty(Ctx); - Type *Int8Ty = Type::getInt8Ty(Ctx); - Value *Int8Zero = ConstantInt::get(Int8Ty, 0); - - // Split the current basic block at the point of the existing shift - // instruction and insert a new basic block for the loop. - BasicBlock *BB = BI->getParent(); - Function *F = BB->getParent(); - BasicBlock *EndBB = BB->splitBasicBlock(BI, "shift.done"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "shift.loop", F, EndBB); - - // Truncate the shift amount to i8, which is trivially lowered to a single - // AVR register. - Builder.SetInsertPoint(&BB->back()); - Value *ShiftAmount = Builder.CreateTrunc(BI->getOperand(1), Int8Ty); - - // Replace the unconditional branch that splitBasicBlock created with a - // conditional branch. - Value *Cmp1 = Builder.CreateICmpEQ(ShiftAmount, Int8Zero); - Builder.CreateCondBr(Cmp1, EndBB, LoopBB); - BB->back().eraseFromParent(); - - // Create the loop body starting with PHI nodes. - Builder.SetInsertPoint(LoopBB); - PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2); - ShiftAmountPHI->addIncoming(ShiftAmount, BB); - PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2); - ValuePHI->addIncoming(BI->getOperand(0), BB); - - // Subtract the shift amount by one, as we're shifting one this loop - // iteration. - Value *ShiftAmountSub = - Builder.CreateSub(ShiftAmountPHI, ConstantInt::get(Int8Ty, 1)); - ShiftAmountPHI->addIncoming(ShiftAmountSub, LoopBB); - - // Emit the actual shift instruction. The difference is that this shift - // instruction has a constant shift amount, which can be emitted inline - // without a library call. - Value *ValueShifted; - switch (BI->getOpcode()) { - case Instruction::Shl: - ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1)); - break; - case Instruction::LShr: - ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1)); - break; - case Instruction::AShr: - ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1)); - break; - default: - llvm_unreachable("asked to expand an instruction that is not a shift"); - } - ValuePHI->addIncoming(ValueShifted, LoopBB); - - // Branch to either the loop again (if there is more to shift) or to the - // basic block after the loop (if all bits are shifted). - Value *Cmp2 = Builder.CreateICmpEQ(ShiftAmountSub, Int8Zero); - Builder.CreateCondBr(Cmp2, EndBB, LoopBB); - - // Collect the resulting value. This is necessary in the IR but won't produce - // any actual instructions. - Builder.SetInsertPoint(BI); - PHINode *Result = Builder.CreatePHI(Int32Ty, 2); - Result->addIncoming(BI->getOperand(0), BB); - Result->addIncoming(ValueShifted, LoopBB); - - // Replace the original shift instruction. - BI->replaceAllUsesWith(Result); - BI->eraseFromParent(); -} diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h --- a/llvm/lib/Target/AVR/AVRSubtarget.h +++ b/llvm/lib/Target/AVR/AVRSubtarget.h @@ -85,6 +85,8 @@ uint8_t getIORegisterOffset() const { return hasMemMappedGPR() ? 0x20 : 0x0; } + bool enableSubRegLiveness() const override { return true; } + /// Gets the ELF architecture for the e_flags field /// of an ELF object file. unsigned getELFArch() const { diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp --- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp +++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp @@ -68,7 +68,6 @@ return getTM(); } - void addIRPasses() override; bool addInstSelector() override; void addPreSched2() override; void addPreEmitPass() override; @@ -79,22 +78,12 @@ return new AVRPassConfig(*this, PM); } -void AVRPassConfig::addIRPasses() { - // Expand instructions like - // %result = shl i32 %n, %amount - // to a loop so that library calls are avoided. - addPass(createAVRShiftExpandPass()); - - TargetPassConfig::addIRPasses(); -} - extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() { // Register the target. RegisterTargetMachine X(getTheAVRTarget()); auto &PR = *PassRegistry::getPassRegistry(); initializeAVRExpandPseudoPass(PR); - initializeAVRShiftExpandPass(PR); initializeAVRDAGToDAGISelPass(PR); } diff --git a/llvm/lib/Target/AVR/CMakeLists.txt b/llvm/lib/Target/AVR/CMakeLists.txt --- a/llvm/lib/Target/AVR/CMakeLists.txt +++ b/llvm/lib/Target/AVR/CMakeLists.txt @@ -23,7 +23,6 @@ AVRISelLowering.cpp AVRMCInstLower.cpp AVRRegisterInfo.cpp - AVRShiftExpand.cpp AVRSubtarget.cpp AVRTargetMachine.cpp AVRTargetObjectFile.cpp diff --git a/llvm/test/CodeGen/AVR/hardware-mul.ll b/llvm/test/CodeGen/AVR/hardware-mul.ll --- a/llvm/test/CodeGen/AVR/hardware-mul.ll +++ b/llvm/test/CodeGen/AVR/hardware-mul.ll @@ -14,12 +14,12 @@ define i16 @mult16(i16 %a, i16 %b) { ; CHECK-LABEL: mult16: ; CHECK: muls r22, r25 -; CHECK: mov r20, r0 +; CHECK: mov r25, r0 ; CHECK: mul r22, r24 -; CHECK: mov r21, r0 +; CHECK: mov r20, r0 ; CHECK: mov r18, r1 ; CHECK: clr r1 -; CHECK: add r18, r20 +; CHECK: add r18, r25 ; CHECK: muls r23, r24 ; CHECK: clr r1 ; CHECK: add r18, r0 diff --git a/llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll b/llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll --- a/llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll +++ b/llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll @@ -227,14 +227,12 @@ ; CHECK-NEXT: mov r30, r22 ; CHECK-NEXT: mov r22, r24 ; CHECK-NEXT: mov r26, r22 -; CHECK-NEXT: mov r27, r23 ; CHECK-NEXT: ;APP ; CHECK-NEXT: mov r26, r26 ; CHECK-NEXT: add r26, r30 ; CHECK-NEXT: ;NO_APP -; CHECK-NEXT: mov r24, r26 -; CHECK-NEXT: ; kill: def $r22 killed $r22 killed $r23r22 ; CHECK-NEXT: mov r20, r30 +; CHECK-NEXT: mov r24, r26 ; CHECK-NEXT: rcall foo8 ; CHECK-NEXT: ret %3 = tail call i8 asm sideeffect "mov $0, $1\0Aadd $0, $2", "=e,e,e"(i8 %0, i8 %1) @@ -294,7 +292,6 @@ ; CHECK-NEXT: mov r24, r30 ; CHECK-NEXT: add r24, r26 ; CHECK-NEXT: ;NO_APP -; CHECK-NEXT: ; kill: def $r24 killed $r24 killed $r25r24 ; CHECK-NEXT: mov r22, r30 ; CHECK-NEXT: mov r20, r26 ; CHECK-NEXT: rcall foo8 @@ -345,9 +342,6 @@ ; CHECK-NEXT: ;NO_APP ; CHECK-NEXT: mov r24, r30 ; CHECK-NEXT: mov r25, r31 -; CHECK-NEXT: ; kill: def $r24 killed $r24 killed $r25r24 -; CHECK-NEXT: ; kill: def $r22 killed $r22 killed $r23r22 -; CHECK-NEXT: ; kill: def $r20 killed $r20 killed $r21r20 ; CHECK-NEXT: rcall foo8 ; CHECK-NEXT: pop r29 ; CHECK-NEXT: pop r28 diff --git a/llvm/test/CodeGen/AVR/shift-expand.ll b/llvm/test/CodeGen/AVR/shift-expand.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AVR/shift-expand.ll +++ /dev/null @@ -1,89 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -avr-shift-expand -S %s -o - | FileCheck %s - -; The avr-shift-expand pass expands large shifts with a non-constant shift -; amount to a loop. These loops avoid generating a (non-existing) builtin such -; as __ashlsi3. - -target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8" -target triple = "avr" - -define i32 @shl(i32 %value, i32 %amount) addrspace(1) { -; CHECK-LABEL: @shl( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] -; CHECK: shift.loop: -; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1 -; CHECK-NEXT: [[TMP6]] = shl i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0 -; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] -; CHECK: shift.done: -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: ret i32 [[TMP8]] -; - %result = shl i32 %value, %amount - ret i32 %result -} - -define i32 @lshr(i32 %value, i32 %amount) addrspace(1) { -; CHECK-LABEL: @lshr( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] -; CHECK: shift.loop: -; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1 -; CHECK-NEXT: [[TMP6]] = lshr i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0 -; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] -; CHECK: shift.done: -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: ret i32 [[TMP8]] -; - %result = lshr i32 %value, %amount - ret i32 %result -} - -define i32 @ashr(i32 %0, i32 %1) addrspace(1) { -; CHECK-LABEL: @ashr( -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1:%.*]] to i8 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i8 [[TMP3]], 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] -; CHECK: shift.loop: -; CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[TMP2:%.*]] ], [ [[TMP7:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP0:%.*]], [[TMP2]] ], [ [[TMP8:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP7]] = sub i8 [[TMP5]], 1 -; CHECK-NEXT: [[TMP8]] = ashr i32 [[TMP6]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[TMP9]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] -; CHECK: shift.done: -; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP0]], [[TMP2]] ], [ [[TMP8]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: ret i32 [[TMP10]] -; - %3 = ashr i32 %0, %1 - ret i32 %3 -} - -; This function is not modified because it is not an i32. -define i40 @shl40(i40 %value, i40 %amount) addrspace(1) { -; CHECK-LABEL: @shl40( -; CHECK-NEXT: [[RESULT:%.*]] = shl i40 [[VALUE:%.*]], [[AMOUNT:%.*]] -; CHECK-NEXT: ret i40 [[RESULT]] -; - %result = shl i40 %value, %amount - ret i40 %result -} - -; This function isn't either, although perhaps it should. -define i24 @shl24(i24 %value, i24 %amount) addrspace(1) { -; CHECK-LABEL: @shl24( -; CHECK-NEXT: [[RESULT:%.*]] = shl i24 [[VALUE:%.*]], [[AMOUNT:%.*]] -; CHECK-NEXT: ret i24 [[RESULT]] -; - %result = shl i24 %value, %amount - ret i24 %result -} diff --git a/llvm/test/CodeGen/AVR/shift-loop.ll b/llvm/test/CodeGen/AVR/shift-loop.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AVR/shift-loop.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -mtriple=avr -verify-machineinstrs -stop-after=dead-mi-elimination | FileCheck %s + +; This test shows the machine IR that is generated when lowering a shift +; operation to a loop. + +define i32 @shl_i32_n(i32 %a, i32 %b) #0 { + ; CHECK-LABEL: name: shl_i32_n + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $r23r22, $r25r24, $r19r18 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dregs = COPY $r19r18 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dregs = COPY $r25r24 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dregs = COPY $r23r22 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr8 = COPY [[COPY]].sub_lo + ; CHECK-NEXT: RJMPk %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDRdRr:%[0-9]+]]:gpr8 = ADDRdRr %10, %10, implicit-def $sreg + ; CHECK-NEXT: [[ADCRdRr:%[0-9]+]]:gpr8 = ADCRdRr %9, %9, implicit-def $sreg, implicit $sreg + ; CHECK-NEXT: [[ADCRdRr1:%[0-9]+]]:gpr8 = ADCRdRr %8, %8, implicit-def $sreg, implicit $sreg + ; CHECK-NEXT: [[ADCRdRr2:%[0-9]+]]:gpr8 = ADCRdRr %7, %7, implicit-def $sreg, implicit $sreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr8 = PHI [[COPY1]].sub_hi, %bb.0, [[ADCRdRr2]], %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr8 = PHI [[COPY1]].sub_lo, %bb.0, [[ADCRdRr1]], %bb.1 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gpr8 = PHI [[COPY2]].sub_hi, %bb.0, [[ADCRdRr]], %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr8 = PHI [[COPY2]].sub_lo, %bb.0, [[ADDRdRr]], %bb.1 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gpr8 = PHI [[COPY3]], %bb.0, %17, %bb.1 + ; CHECK-NEXT: [[DECRd:%[0-9]+]]:gpr8 = DECRd [[PHI4]], implicit-def $sreg + ; CHECK-NEXT: BRPLk %bb.1, implicit $sreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3 (%ir-block.0): + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:dregs = REG_SEQUENCE [[PHI]], %subreg.sub_hi, [[PHI1]], %subreg.sub_lo + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:dregs = REG_SEQUENCE [[PHI2]], %subreg.sub_hi, [[PHI3]], %subreg.sub_lo + ; CHECK-NEXT: $r23r22 = COPY [[REG_SEQUENCE1]] + ; CHECK-NEXT: $r25r24 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: RET implicit $r23r22, implicit $r25r24, implicit $r1 + %res = shl i32 %a, %b + ret i32 %res +} diff --git a/llvm/test/CodeGen/AVR/shift32.ll b/llvm/test/CodeGen/AVR/shift32.ll --- a/llvm/test/CodeGen/AVR/shift32.ll +++ b/llvm/test/CodeGen/AVR/shift32.ll @@ -1,6 +1,67 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=avr -mattr=movw -verify-machineinstrs | FileCheck %s +; Shift by a number unknown at compile time. +; The 'optsize' attribute is set to avoid duplicating part of the loop. +; TODO: it is more efficent to jump at the start and do the check where the +; 'rjmp' is now. The branch relaxation pass puts them in this non-optimal order. + +define i32 @shl_i32_n(i32 %a, i32 %b) #0 { +; CHECK-LABEL: shl_i32_n: +; CHECK: ; %bb.0: +; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: dec r18 +; CHECK-NEXT: brmi .LBB0_3 +; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: rol r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: rjmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: ret + %res = shl i32 %a, %b + ret i32 %res +} + +define i32 @lshr_i32_n(i32 %a, i32 %b) #0 { +; CHECK-LABEL: lshr_i32_n: +; CHECK: ; %bb.0: +; CHECK-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: dec r18 +; CHECK-NEXT: brmi .LBB1_3 +; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: lsr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: rjmp .LBB1_1 +; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: ret + %res = lshr i32 %a, %b + ret i32 %res +} + +define i32 @ashr_i32_n(i32 %a, i32 %b) #0 { +; CHECK-LABEL: ashr_i32_n: +; CHECK: ; %bb.0: +; CHECK-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: dec r18 +; CHECK-NEXT: brmi .LBB2_3 +; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: asr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: rjmp .LBB2_1 +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: ret + %res = ashr i32 %a, %b + ret i32 %res +} + +; Shift by a constant known at compile time. + define i32 @shl_i32_1(i32 %a) { ; CHECK-LABEL: shl_i32_1: ; CHECK: ; %bb.0: @@ -575,3 +636,5 @@ %res = ashr i32 %a, 31 ret i32 %res } + +attributes #0 = { optsize }