Index: llvm/lib/Target/ARC/ARC.h =================================================================== --- llvm/lib/Target/ARC/ARC.h +++ llvm/lib/Target/ARC/ARC.h @@ -19,13 +19,14 @@ namespace llvm { -class FunctionPass; +class ARCSubtarget; class ARCTargetMachine; +class FunctionPass; FunctionPass *createARCISelDag(ARCTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createARCExpandPseudosPass(); -FunctionPass *createARCOptAddrMode(); +FunctionPass *createARCOptAddrMode(const ARCSubtarget &); FunctionPass *createARCBranchFinalizePass(); } // end namespace llvm Index: llvm/lib/Target/ARC/ARCInstrInfo.td =================================================================== --- llvm/lib/Target/ARC/ARCInstrInfo.td +++ llvm/lib/Target/ARC/ARCInstrInfo.td @@ -133,6 +133,9 @@ "STB_FAR $dst, $addr", [(truncstorei8 GPR32:$dst, AddrModeFar:$addr)]>; +// To be deleted opcode +def TBD : PseudoInstARC<(outs),(ins),"TO_BE_DELETED",[]>; + // TODO: Add `Requires<[HasBitScan]>` predicate to these when available. let Defs = [STATUS32] in { def CTLZ : PseudoInstARC<(outs GPR32:$A), @@ -289,6 +292,9 @@ // Definitions for 3 operand binary instructions. defm ADD : ArcBinaryGEN4Inst<0b000000, "add",1>; +defm ADD1 : ArcBinaryGEN4Inst<0b010100, "add1">; +defm ADD2 : ArcBinaryGEN4Inst<0b010101, "add2">; +defm ADD3 : ArcBinaryGEN4Inst<0b010110, "add3">; defm SUB : ArcBinaryGEN4Inst<0b000010, "sub">; defm SUB1 : ArcBinaryGEN4Inst<0b010111, "sub1">; defm SUB2 : ArcBinaryGEN4Inst<0b011000, "sub2">; Index: llvm/lib/Target/ARC/ARCOptAddrMode.cpp =================================================================== --- llvm/lib/Target/ARC/ARCOptAddrMode.cpp +++ llvm/lib/Target/ARC/ARCOptAddrMode.cpp @@ -15,11 +15,14 @@ #define GET_INSTRMAP_INFO #include "ARCInstrInfo.h" #include "ARCTargetMachine.h" +#include "ARCUtil.h" +#include "MCTargetDesc/ARCMCUtil.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" @@ -34,6 +37,13 @@ #define DEBUG_TYPE "arc-addr-mode" namespace llvm { +#if 0 +} fix emacs; +#endif + +// Compute best -Os/-Os1 settings +static cl::opt ArcAbawMaxSpace("arc-abaw-max-space", cl::init(16), + cl::ReallyHidden, cl::ZeroOrMore); static cl::opt ArcKillAddrMode("arc-kill-addr-mode", cl::init(0), cl::ReallyHidden, cl::ZeroOrMore); @@ -43,9 +53,257 @@ #define VIEW_BEFORE() ((ArcKillAddrMode & 0x0004) != 0) #define VIEW_AFTER() ((ArcKillAddrMode & 0x0008) != 0) #define KILL_PASS() ((ArcKillAddrMode & 0x0010) != 0) +#define SINK_LDST() ((ArcKillAddrMode & 0x00000020) != 0) +#define GEN_PSEUDO_INC() ((ArcKillAddrMode & 0x00000040) != 0) +#define KILL_VDSP_VVLD() ((ArcKillAddrMode & 0x00000080) != 0) -FunctionPass *createARCOptAddrMode(); void initializeARCOptAddrModePass(PassRegistry &); + +// TODO port +#define mayHaveShortForm(x) false + +namespace ARC { +#if 0 +} fix emacs; +#endif + +// TODO +bool isVectorInstr(long long) { return false; } + +// Structure to represent register increment +// Reg - increment value +// Scale - scale, as present in inctructions like ADD2, LD_rras etc +struct RegIncrement { + unsigned Reg; + unsigned Scale; +}; + +// Structure to represent Increment/Offset of instruction. Can be immediate or +// (scaled) register +struct BaseIncr { + bool IsImm = false; + union { + RegIncrement RI; + int64_t LI = 0; + } u; + BaseIncr() : IsImm(false) { u.LI = 0; } + BaseIncr(int64_t ImmOff) : IsImm(true) { u.LI = ImmOff; } + BaseIncr(unsigned Reg, unsigned Scale) : IsImm(false) { u.RI = {Reg, Scale}; } + + BaseIncr(const BaseIncr &other) { + IsImm = other.IsImm; + u = other.u; + } + + bool operator==(const BaseIncr &other) { + if (IsImm != other.IsImm) + return false; + if (IsImm) + return u.LI == other.u.LI; + else + return (u.RI.Reg = other.u.RI.Reg && u.RI.Scale == other.u.RI.Scale); + } + bool operator!=(const BaseIncr &other) { return !operator==(other); } + + int64_t getImm() const { + assert(IsImm && "not an immediate"); + return u.LI; + } + unsigned getReg() const { + assert(!IsImm && "not a reg incr"); + return u.RI.Reg; + } + unsigned getScale() const { + assert(!IsImm && "not a reg incr"); + return u.RI.Scale; + } + + void setImm(int64_t Imm) { + assert(IsImm && "not an immediate"); + u.LI = Imm; + } + void setReg(unsigned Reg) { + assert(!IsImm && "not a reg incr"); + u.RI.Reg = Reg; + } + void setScale(unsigned Scale) { + assert(!IsImm && "not a reg incr"); + u.RI.Scale = Scale; + } + + RegIncrement &getRegIncrement() { return u.RI; } + + void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { + OS << "Offset: "; + if (IsImm) + OS << getImm(); + else + OS << printReg(getReg(), TRI) << " * " << getScale(); + } +}; + +class ABAW : public ARC::SsaInstructionVisitor { + + typedef SmallVectorImpl InstrVector; + typedef std::pair InstIncrPair; + + MachineDominatorTree &DOM; + const MachineLoopInfo &MLI; + bool IsOptimizeForSpace = false; + bool InSWPCandidate = false; + + // Map of tied registers with non-immediate increments + // E.g., generating + // %vreg104, %vreg100 = LDD_rr_ab %vreg99, %vreg32 + // will add to this map: + // tiedRegisterMap[%vreg99] = {%vreg100, %vreg32, LDD_rr_ab}; + // We use it to perform kind of copy propagation for tied registers. + // See STAR 9001160762 for example where it could be useful + struct TiedRegIncrement { + unsigned newReg; + unsigned incReg; + MachineInstr *def; + }; + std::map tiedRegisterMap; + + // Instruction ordinals within BB. + // Used for quick lexical ordering of MIs within BB to avoid + // quadratic behaviour of dominates() + std::map Ordinals; + + // Vectors of candidate load/stores and increments + SmallVector Candidates, Increments; // TODO: MapVector maybe? + + // Vector of uses of base register outside current BB in a block dominated by + // BB + // TODO: they really can be made InstIncrPair. If we cannot recognize it, we + // cannot fix it anyway + SmallVector ExternalDominatedUses; + + // Vector of uses of base register outside current BB in a block that + // dominates BB + SmallVector ExternalDominatingUses; + + // If true, generate preincrement form instead of postincrement + bool GeneratePreInc = false; + + MachineInstr *CurrentInst = nullptr; + +public: + ABAW(const ARCSubtarget &ST, MachineFunction &MF, MachineDominatorTree &DOM, + const MachineLoopInfo &MLI) + : SsaInstructionVisitor(ST, MF), DOM(DOM), MLI(MLI) { + IsOptimizeForSpace = ARC::isOptimizeForSpace(MF); + } + + bool visit(MachineInstr &) override; + void preBlockCallout(MachineBasicBlock &) override; + void postBlockCallout(MachineBasicBlock &, bool) override; + +private: + // Check if either one instruction can be moved (up or down) to another. + // Returns instruction which another can be moved to (i.e., one not moved) or + // nullptr if nothing can be moved + MachineInstr *canJoinInstructions(MachineInstr *ldst, MachineInstr *add); + + // Checks that there are no uses of add in interval (add, ldst) + bool noUseOfAddBeforeLoadOrStore(MachineInstr *add, MachineInstr *ldst); + + // Update instruction operands accorging to pre/post-increment form. + // MI - instruction to update + // NewBaseReg - register for new base + // BaseReg - base register + // NewOffset - offset + void setLoadStoreBaseOffset(MachineInstr &MI, unsigned NewBaseReg, + unsigned BaseReg, + const MachineOperand &NewOffset); + + void setLoadStoreBaseOffset(MachineInstr &MI, unsigned NewBaseReg, + unsigned BaseReg, int64_t NewOffset); + + // Is "MI" the result of adding a constant or register to itself? + // If so, make the first source operand reference the PHI + bool isSelfIncrementing(MachineInstr &MI) const; + + // Check if load/store instruction 'Ldst' can be hoisted up to instruction + // 'To' + bool canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To); + + // Check if load/store instruction 'Ldst' can be sunk down to instruction 'To' + bool canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To); + + // Return true if all instructions in 'Uses' can be updated to accomodate + // BaseReg's increment by instruction Incr + bool canFixPastUses(const InstrVector &Uses, MachineInstr *Incr, + unsigned BaseReg); + + // Adjust all uses of 'base' after 'ldst' to accomodate base increment by + // newOffset + void fixIntermediates(MachineInstr *ldst, unsigned newBase, int64_t newOffset, + unsigned oldBase); + + // As above, but increment is not a literal but register 'Addend' scaled by + // 'Scale' + void fixIntermediatesReg(MachineInstr *ldst, unsigned newBase, + unsigned Addend, unsigned Scale, unsigned Base); + + // Try to combine load/store instruction 'Ldst' with base register increment + bool tryToCombine(MachineInstr &Ldst); + + bool hasOnePhiUse(const MachineOperand &opd) const; + bool transformPhiForBetterLICM(MachineInstr *Phi, MachineInstr *Ldst, + unsigned BaseIdx, unsigned OffIdx); + + void clear(); + + // Collect instruction in MBB using BaseReg as a base register + // Return true is something has been found + bool collectCandidates(unsigned BaseReg, MachineBasicBlock *MBB); + + // Check candidates to see if address postincrement can be applied + bool analyzeCandidates(unsigned BaseReg, MachineBasicBlock *MBB); + + // Analysis specific for instructions with immediate offsets + bool analyzeCandidatesImm(unsigned BaseReg, MachineBasicBlock *MBB); + + // Analysis specific for instructions with register offsets + bool analyzeCandidatesReg(unsigned BaseReg, MachineBasicBlock *MBB); + + // Verify all "Candidates" have increment forms + bool validateIncrementForms() const; + + // Apply transformation to eligible instructions + bool transformCandidates(unsigned BaseReg, MachineBasicBlock *MBB); + + // In case base address if loop IV, try to transform is so that + // first load/store instruction has zero offset + bool tryToTransformPHI(unsigned BaseReg, MachineBasicBlock *MBB); + + // Try to reorder loads/stores so that their offsets are more + // amenable to transformation + bool tryToReorderCandidatesImm(); + + // See if address preincrement can be generated instead of postincrement + // Somtimes it can be simpler transformation + bool tryToGenPreIncImm(); + + // Check if candidate instructions can be changed to postincrement form + bool checkCandidatesImm(); + + unsigned getConversionToAW(MachineInstr &MI) const; + unsigned getConversionToAB(MachineInstr &MI) const; + unsigned getVDSPAVVariant(unsigned ABOpcode) const; + + void computeOrdinals(const MachineBasicBlock &MBB) { + Ordinals.clear(); + unsigned N = 1; + for (auto &I : MBB) + Ordinals[&I] = N++; + } +}; + +} // end namespace ARC + } // end namespace llvm namespace { @@ -53,13 +311,19 @@ public: static char ID; + const ARCSubtarget *ST; + ARCOptAddrMode() : MachineFunctionPass(ID) {} + ARCOptAddrMode(const ARCSubtarget &ST) : MachineFunctionPass(ID), ST(&ST) {} + StringRef getPassName() const override { return OPTADDRMODE_DESC; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); } @@ -67,51 +331,6 @@ bool runOnMachineFunction(MachineFunction &MF) override; private: - const ARCSubtarget *AST = nullptr; - const ARCInstrInfo *AII = nullptr; - MachineRegisterInfo *MRI = nullptr; - MachineDominatorTree *MDT = nullptr; - - // Tries to combine \p Ldst with increment of its base register to form - // single post-increment instruction. - MachineInstr *tryToCombine(MachineInstr &Ldst); - - // Returns true if result of \p Add is not used before \p Ldst - bool noUseOfAddBeforeLoadOrStore(const MachineInstr *Add, - const MachineInstr *Ldst); - - // Returns true if load/store instruction \p Ldst can be hoisted up to - // instruction \p To - bool canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To); - - // // Returns true if load/store instruction \p Ldst can be sunk down - // // to instruction \p To - // bool canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To); - - // Check if instructions \p Ldst and \p Add can be moved to become adjacent - // If they can return instruction which need not to move. - // If \p Uses is not null, fill it with instructions after \p Ldst which use - // \p Ldst's base register - MachineInstr *canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add, - SmallVectorImpl *Uses); - - // Returns true if all instruction in \p Uses array can be adjusted - // to accomodate increment of register \p BaseReg by \p Incr - bool canFixPastUses(const ArrayRef &Uses, - MachineOperand &Incr, unsigned BaseReg); - - // Update all instructions in \p Uses to accomodate increment - // of \p BaseReg by \p Offset - void fixPastUses(ArrayRef Uses, unsigned BaseReg, - int64_t Offset); - - // Change instruction \p Ldst to postincrement form. - // \p NewBase is register to hold update base value - // \p NewOffset is instruction's new offset - void changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode, - unsigned NewBase, MachineOperand &NewOffset); - - bool processBasicBlock(MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -119,285 +338,1002 @@ char ARCOptAddrMode::ID = 0; INITIALIZE_PASS_BEGIN(ARCOptAddrMode, OPTADDRMODE_NAME, OPTADDRMODE_DESC, false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(ARCOptAddrMode, OPTADDRMODE_NAME, OPTADDRMODE_DESC, false, false) -// Return true if \p Off can be used as immediate offset -// operand of load/store instruction (S9 literal) -static bool isValidLoadStoreOffset(int64_t Off) { return isInt<9>(Off); } +bool ARCOptAddrMode::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction()) || KILL_PASS()) + return false; + + if (DUMP_BEFORE()) + MF.dump(); + if (VIEW_BEFORE()) + MF.viewCFG(); -// Return true if \p Off can be used as immediate operand of -// ADD/SUB instruction (U6 literal) -static bool isValidIncrementOffset(int64_t Off) { return isUInt<6>(Off); } + LLVM_DEBUG(dbgs() << ">>>Starting ARCOptAddrMode pass for " + << MF.getFunction().getName() << "\n"); + bool Changed = ARC::ABAW(*ST, MF, getAnalysis(), + getAnalysis()) + .examineEachInstr(); + LLVM_DEBUG(dbgs() << ">>>Ended ARCOptAddrMode pass for " + << MF.getFunction().getName() << " with result " << Changed + << "\n"); -static bool isAddConstantOp(const MachineInstr &MI, int64_t &Amount) { - int64_t Sign = 1; - switch (MI.getOpcode()) { - case ARC::SUB_rru6: - Sign = -1; - LLVM_FALLTHROUGH; - case ARC::ADD_rru6: - assert(MI.getOperand(2).isImm() && "Expected immediate operand"); - Amount = Sign * MI.getOperand(2).getImm(); - return true; - default: - return false; + if (DUMP_AFTER()) + MF.dump(); + if (VIEW_AFTER()) + MF.viewCFG(); + return Changed; +} + +// Returns TRUE if opd has exactly one use by a PHI instruction +bool ARC::ABAW::hasOnePhiUse(const MachineOperand &opd) const { + int count = 0; + for (MachineInstr &I : MRI.use_nodbg_instructions(opd.getReg())) { + if (I.getOpcode() != ARC::PHI) + return false; + ++count; } + return count == 1; } -// Return true if \p MI dominates of uses of virtual register \p VReg -static bool dominatesAllUsesOf(const MachineInstr *MI, unsigned VReg, - MachineDominatorTree *MDT, - MachineRegisterInfo *MRI) { +// Try to change +// BB#1: +// %r0 = ... +// +// BB#2: +// %r1 = PHI %r0, , %r2, +// %r3 = LD_rs9 %r1, off +// %r2 = ADD_rru6 %r1, inc +// +// to +// +// BB#1: +// %r0 = ... +// %r4 = %r0 + off +// +// BB#2: +// %r1 = PHI %r4, , %r2, +// %r3 = LD_rs9 %r1, 0 +// %r2 = ADD_rru6 %r1, inc +// +bool ARC::ABAW::tryToTransformPHI(unsigned BaseReg, MachineBasicBlock *MBB) { + // See if we can change first load/store to have zero offset + LLVM_DEBUG(dbgs() << "\toffset of first mem instr is non-zero - check if it " + "can be zeroed\n"); + + if (!ExternalDominatedUses.empty()) { + return false; // TODO: it may be possible to adjust them + } - assert(Register::isVirtualRegister(VReg) && "Expected virtual register!"); + // Our base address must be simple loop local variable (not leaking outside) + MachineInstr *PHI = getSsaDef(BaseReg); + if (!PHI || PHI->getParent() != MBB || !PHI->isPHI()) { + LLVM_DEBUG( + dbgs() << "\tbase reg def is not PHI or not in single-block loop\n"); + return false; + } - for (auto it = MRI->use_nodbg_begin(VReg), end = MRI->use_nodbg_end(); - it != end; ++it) { - MachineInstr *User = it->getParent(); - if (User->isPHI()) { - unsigned BBOperandIdx = User->getOperandNo(&*it) + 1; - MachineBasicBlock *MBB = User->getOperand(BBOperandIdx).getMBB(); - if (MBB->empty()) { - const MachineBasicBlock *InstBB = MI->getParent(); - assert(InstBB != MBB && "Instruction found in empty MBB"); - if (!MDT->dominates(InstBB, MBB)) - return false; + if (PHI->getNumOperands() != 5) { + LLVM_DEBUG(dbgs() << "\twrong number of PHI's operands\n"); + return false; + } + + MachineInstr *Incr = Increments[0].first; + if (!hasOnePhiUse(Incr->getOperand(0))) { + LLVM_DEBUG(dbgs() << "\tincrement has more than single PHI use\n"); + return false; + } + if (Incr != getSsaDef(PHI->getOperand(3).getReg())) { + return false; + } + + unsigned BaseIdx = (Candidates[0].first->mayLoad()) ? 1 : 0; + unsigned OffIdx = BaseIdx + 1; + + if (!Candidates[0].second.IsImm) { + // TODO: fix transformPhiForBetterLICM to handle multiple insns + if (Candidates.size() > 1) + return false; + + bool IncrIsImm = Increments[0].second.IsImm; + for (auto &C : Candidates) { + if (C.first == Incr) continue; - } - User = &*MBB->rbegin(); + if (C.second.IsImm) + return false; + if (IncrIsImm && (C.second.getScale() - 1) != 0) + return false; // No point doing this transformation + } + bool baseOffsetSwapped = false; + if (ARC::getVReg(Candidates[0].first->getOperand(BaseIdx)) != BaseReg) { + assert(ARC::getVReg(Candidates[0].first->getOperand(OffIdx)) == BaseReg); + std::swap(BaseIdx, OffIdx); + baseOffsetSwapped = true; + } + + // Def of offset must dominate incoming block + MachineBasicBlock *IncomingMBB = PHI->getOperand(2).getMBB(); + MachineInstr *OffDef = getSsaDef(Candidates[0].first->getOperand(OffIdx)); + if (OffDef == nullptr) + return false; + if (!DOM.dominates(OffDef->getParent(), IncomingMBB)) { + return false; } - if (!MDT->dominates(MI, User)) + if (!transformPhiForBetterLICM(PHI, Candidates[0].first, BaseIdx, OffIdx)) return false; + + if (baseOffsetSwapped) { + // Avoid nonsense like "ldub %r0,[0,%1]" + + MachineInstr *Ldst = Candidates[0].first; + if (OffIdx + 1 == BaseIdx) { // Should always be true + MachineOperand base = Ldst->getOperand(BaseIdx); + MachineOperand off = Ldst->getOperand(OffIdx); + if (base.isReg() && off.isImm()) { + Ldst->getOperand(OffIdx).ChangeToRegister(base.getReg(), false); + Ldst->getOperand(OffIdx).setSubReg(base.getSubReg()); + Ldst->getOperand(BaseIdx).ChangeToImmediate(off.getImm()); + } + } + } + + for (auto &C : Candidates) { + if (C.first == Incr) + continue; + unsigned NewScale = C.second.getScale() - 1; + if (NewScale) { + C.second.setScale(NewScale); + } else { + C.second.IsImm = true; + C.second.setImm(0); + } + } + LLVM_DEBUG(dbgs() << "Transformed Candidates:\n"; for (auto &C + : Candidates) { + dbgs() << Ordinals[C.first] << ": " << *C.first << "\t"; + C.second.print(dbgs(), TRI); + dbgs() << "\n"; + }); + return true; } + + // Immediate offset case + + int64_t PrevOffset = Candidates[0].second.getImm(); + + if (!transformPhiForBetterLICM(PHI, Candidates[0].first, BaseIdx, OffIdx)) + return false; + + for (auto &C : Candidates) { + if (C.first == Incr) + continue; + int64_t NewOffset = C.second.getImm() - PrevOffset; + C.second.setImm(NewOffset); + } + LLVM_DEBUG(dbgs() << "Transformed Candidates:\n"; for (auto &C + : Candidates) { + dbgs() << Ordinals[C.first] << ": " << *C.first << "\t"; + C.second.print(dbgs(), TRI); + dbgs() << "\n"; + }); return true; } -// Return true if \p MI is load/store instruction with immediate offset -// which can be adjusted by \p Disp -static bool isLoadStoreThatCanHandleDisplacement(const TargetInstrInfo *TII, - const MachineInstr &MI, - int64_t Disp) { - unsigned BasePos, OffPos; - if (!TII->getBaseAndOffsetPosition(MI, BasePos, OffPos)) - return false; - const MachineOperand &MO = MI.getOperand(OffPos); - if (!MO.isImm()) +// (STAR 9000862576) ISel scheduler loves to swap loads from same BaseReg and +// put one with bigger offset first: +// %vreg17 = LD_rs9 %vreg181, 68; +// %vreg20 = LD_rs9 %vreg181, 4; +// %vreg27 = LD_rs9 %vreg181, 260; +// %vreg50 = LD_rs9 %vreg181, 324; +// ... +// We can generate postincrement only if two first loads may be swapped +// It usually happens with first two loads, hence this simple ad hoc +// implementation + +bool ARC::ABAW::tryToReorderCandidatesImm() { + bool Changed = false; + bool Ascending = (Increments[0].second.getImm() > 0); + size_t N = Candidates.size(); + MachineBasicBlock *MBB = Candidates[0].first->getParent(); + // We don't want to move increments, hence i < N-1 bound. + for (size_t i = 1; i < N - 1; ++i) { + MachineInstr *PrevMI = Candidates[i - 1].first; + MachineInstr *CurrMI = Candidates[i].first; + assert(PrevMI->getParent() == MBB); + assert(CurrMI->getParent() == MBB); + int64_t PrevOff = Candidates[i - 1].second.getImm(); + int64_t CurrOff = Candidates[i].second.getImm(); + if ((Ascending && (CurrOff >= PrevOff)) || + (!Ascending && (CurrOff <= PrevOff))) + continue; + LLVM_DEBUG(dbgs() << "Found misplaced instructions in %bb." + << MBB->getNumber() << ":\n" + << *PrevMI << *CurrMI); + MachineBasicBlock::iterator it1(CurrMI); + ++it1; + MachineBasicBlock::iterator it2(PrevMI); + if (it1 != MBB->end() && canSinkLoadStoreTo(PrevMI, &*it1)) { + if (CurrentInst && CurrentInst->getNextNode() == PrevMI) { + LLVM_DEBUG( + dbgs() << "Cannot move instruction - would break iterators\n"); + return false; + } + if (PrevMI != &*it1) { + PrevMI->removeFromParent(); + MBB->insert(it1, PrevMI); + std::swap(Candidates[i], Candidates[i - 1]); + Changed = true; + } + } else if (it2 != MBB->begin() && canHoistLoadStoreTo(CurrMI, &*(--it2))) { + if (CurrentInst && CurrentInst->getNextNode() == CurrMI) { + LLVM_DEBUG( + dbgs() << "Cannot move instruction - would break iterators\n"); + return false; + } + CurrMI->removeFromParent(); + MBB->insertAfter(it2, CurrMI); + std::swap(Candidates[i], Candidates[i - 1]); + Changed = true; + } + } + + LLVM_DEBUG(if (Changed) { + dbgs() << "Reordered Candidates:\n"; + for (auto &C : Candidates) { + dbgs() << Ordinals[C.first] << ": " << *C.first << "\t"; + C.second.print(dbgs(), TRI); + dbgs() << "\n"; + } + }); + + return Changed; +} + +// Change +// %r3 = LD_rs9 %r1, off +// %r2 = ADD_rru6 %r1, off +// +// to +// %r3,%r2 = LD_rs9_aw, %r1, off +// +bool ARC::ABAW::tryToGenPreIncImm() { + size_t N = Candidates.size() - 1; + InstIncrPair &I = Increments[0]; + GeneratePreInc = false; + + if (!isInt<9>(I.second.getImm())) return false; - int64_t Offset = MO.getImm() + Disp; - return isValidLoadStoreOffset(Offset); + + // TODO: run this loop backwards? We can have several eligible instrs, + // see v2_xy/instcombine/valgn_create.c + for (unsigned Pos = 0; Pos < N; ++Pos) { + InstIncrPair &C = Candidates[Pos]; + if (getConversionToAW(*C.first) == 0) + continue; + + if (C.second.getImm() == 0 || C.second.getImm() != I.second.getImm()) + continue; + + if (canJoinInstructions(C.first, I.first) != C.first) + continue; + + LLVM_DEBUG(dbgs() << "\tcan generate preincrement for " + << *Candidates[Pos].first); + GeneratePreInc = true; + for (unsigned i = Pos + 1; i < N; ++i) { + ExternalDominatedUses.push_back(Candidates[i].first); + } + SmallVector Tmp(1, C); + Candidates.swap(Tmp); + return true; + } + return false; } -bool ARCOptAddrMode::noUseOfAddBeforeLoadOrStore(const MachineInstr *Add, - const MachineInstr *Ldst) { - Register R = Add->getOperand(0).getReg(); - return dominatesAllUsesOf(Ldst, R, MDT, MRI); +bool ARC::ABAW::checkCandidatesImm() { + // Would offset deltas fit into S9 immediate? + int64_t PrevOffset = Candidates[0].second.getImm(); + MachineInstr *Incr = Increments[0].first; + + bool allVectorLDST = false; + // TODO + // if (ST.shouldFavorPostIncForVDSP()) { + // allVectorLDST = true; + // for (auto &C: Candidates) { + // if (C.first != Incr && !ARC::isVectorInstr(C.first->getDesc().TSFlags)) + // { + // allVectorLDST = false; + // break; + // } + // } + // } + + for (auto &C : Candidates) { + int64_t Offset = C.second.getImm(); + // Vector load/store supports 16-bit/32-bit LIMMs. + if (!allVectorLDST && !isInt<9>(Offset - PrevOffset)) { + // TODO: put it in register + LLVM_DEBUG(dbgs() << "\tcannot fix offset of " << *C.first); + return false; + } + if (IsOptimizeForSpace && C.first != Incr && mayHaveShortForm(*C.first) && + Offset >= 0 && (Offset - PrevOffset) < 0) { + LLVM_DEBUG(dbgs() << "Negative offset could hurt code size\n"); + return false; + } + PrevOffset = Offset; + } + return true; } -MachineInstr *ARCOptAddrMode::tryToCombine(MachineInstr &Ldst) { - assert(Ldst.mayLoadOrStore() && "LD/ST instruction expected"); +// Reorder add and sub to improve LICM and Post-inc load/store +// %vreg34 = PHI %vreg0, , %vreg41, -- Should have only two uses +// %vreg66 = LDSB_rr %vreg34, %vreg1; -- vreg1 must be loop invariant +// %vreg41 = SUB_rru6 %vreg34, 1; -- Should have only one use +// --> +// %vregxx = ADD_rrr %vreg0, %vreg1; -- Outside the loop at end of +// %vreg34 = PHI %vregxx, , %vreg41, %vreg66 = LDSB_rs9 +// %vreg34, 0; %vreg41 = SUB_rru6 %vreg34, 1; +bool ARC::ABAW::transformPhiForBetterLICM(MachineInstr *Phi, MachineInstr *Ldst, + unsigned BaseIdx, unsigned OffIdx) { + if (!ExternalDominatingUses.empty()) + return false; // TODO: possible to fix PHI use by updating BaseIdx in loop + // exit block + + LLVM_DEBUG(dbgs() << "[ABAW] Ready to transform with\nphi = " << *Phi + << " and Ldst = " << *Ldst); + MachineOperand &Offset = Ldst->getOperand(OffIdx); + MachineBasicBlock *MBB = Phi->getOperand(2).getMBB(); + unsigned NewReg = createVirtReg(Ldst->getOperand(BaseIdx).getReg()); + unsigned InitReg = Phi->getOperand(1).getReg(); + MachineInstr *NewAdd = nullptr; + + if (Offset.isReg()) { + unsigned Opc = ARC::getConversionToRS9(Ldst->getOpcode()); + assert(Opc != 0 && "No RS9 form for instruction"); + Ldst->setDesc(ST.getInstrInfo()->get(Opc)); + // TODO unsigned ADD = ST.isArc64() ? ARC::ADDL_rrr : ARC::ADD_rrr; + unsigned ADD = ST.isArc64() ? ARC::ADD_rrr : ARC::ADD_rrr; + NewAdd = BuildMI(*MBB, MBB->getFirstInstrTerminator(), Ldst->getDebugLoc(), + ST.getInstrInfo()->get(ADD), NewReg) + .addReg(InitReg) + .addReg(Offset.getReg()); + Offset.ChangeToImmediate(0); - unsigned BasePos, OffsetPos; + } else { + int64_t OffsetImm = Offset.getImm(); + // Try simple constant folding + int64_t amount; + MachineInstr *InitRegDef = getSsaDef(InitReg); + if (ARC::isAddConstantOp(*InitRegDef, &amount)) { + InitReg = ARC::getVReg(InitRegDef->getOperand(1)); + OffsetImm += amount; + } - LLVM_DEBUG(dbgs() << "[ABAW] tryToCombine " << Ldst); - if (!AII->getBaseAndOffsetPosition(Ldst, BasePos, OffsetPos)) { - LLVM_DEBUG(dbgs() << "[ABAW] Not a recognized load/store\n"); - return nullptr; + NewAdd = ARC::buildAddByConstant(MRI, *MBB, MBB->getFirstInstrTerminator(), + Ldst->getDebugLoc(), NewReg, InitReg, + OffsetImm); + + OffsetImm = + Offset.getImm(); // We might have spoiled OffsetImm above, get it again + MachineInstr *Incr = getSsaDef(Phi->getOperand(3)); + for (MachineInstr &MI : + MRI.use_nodbg_instructions(Ldst->getOperand(BaseIdx).getReg())) { + if (&MI == Incr) + continue; + // NOTE: It is caller's responsibility to ensure that finally all + // instructions can handle new displacement - in the middle of + // transformation chain this condition temporarily can break + MachineOperand &O = MI.getOperand(MI.mayLoad() ? 2 : 1); + O.setImm(O.getImm() - OffsetImm); + } } - MachineOperand &Base = Ldst.getOperand(BasePos); - MachineOperand &Offset = Ldst.getOperand(OffsetPos); + // Update PHI's incoming value + Phi->getOperand(1).setReg(NewReg); - assert(Base.isReg() && "Base operand must be register"); - if (!Offset.isImm()) { - LLVM_DEBUG(dbgs() << "[ABAW] Offset is not immediate\n"); - return nullptr; - } + LLVM_DEBUG(dbgs() << "[ABAW] Transformed with\nphi = " << *Phi + << " and NewAdd = " << *NewAdd << " and Ldst = " << *Ldst + << "\n"); + return true; +} - Register B = Base.getReg(); - if (Register::isStackSlot(B) || !Register::isVirtualRegister(B)) { - LLVM_DEBUG(dbgs() << "[ABAW] Base is not VReg\n"); - return nullptr; +static unsigned isAddRRR(unsigned Opcode) { + switch (Opcode) { + // TODO + // case ARC::ADDL_rrr: + case ARC::ADD_rrr: + return 1; + // case ARC::ADD1L_rrr: + case ARC::ADD1_rrr: + return 2; + // case ARC::ADD2L_rrr: + case ARC::ADD2_rrr: + return 4; + // case ARC::ADD3L_rrr: + case ARC::ADD3_rrr: + return 8; } + return 0; +} +static unsigned isAddRRR(MachineInstr &MI) { return isAddRRR(MI.getOpcode()); } - // TODO: try to generate address preincrement - if (Offset.getImm() != 0) { - LLVM_DEBUG(dbgs() << "[ABAW] Non-zero offset\n"); - return nullptr; +unsigned ARC::ABAW::getConversionToAB(MachineInstr &MI) const { + if (ARC::isVectorInstr(MI.getDesc().TSFlags)) { + // TODO + // if (!ST.shouldFavorPostIncForVDSP()) + return 0; } + return ARC::getConversionToAB(MI.getOpcode()); +} - for (auto &Add : MRI->use_nodbg_instructions(B)) { - int64_t Incr; - if (!isAddConstantOp(Add, Incr)) - continue; - if (!isValidLoadStoreOffset(Incr)) - continue; - - SmallVector Uses; - MachineInstr *MoveTo = canJoinInstructions(&Ldst, &Add, &Uses); +unsigned ARC::ABAW::getVDSPAVVariant(unsigned ABOpcode) const { + return 0; // TODO upstream VDSP +} - if (!MoveTo) - continue; +unsigned ARC::ABAW::getConversionToAW(MachineInstr &MI) const { + if (ARC::isVectorInstr(MI.getDesc().TSFlags)) { + // TODO + // if (!ST.shouldFavorPostIncForVDSP()) + return 0; + } + return ARC::getConversionToAW(MI.getOpcode()); +} - if (!canFixPastUses(Uses, Add.getOperand(2), B)) - continue; +void ARC::ABAW::preBlockCallout(MachineBasicBlock &MBB) { + computeOrdinals(MBB); +} - LLVM_DEBUG(MachineInstr *First = &Ldst; MachineInstr *Last = &Add; - if (MDT->dominates(Last, First)) std::swap(First, Last); - dbgs() << "[ABAW] Instructions " << *First << " and " << *Last - << " combined\n"; +void ARC::ABAW::postBlockCallout(MachineBasicBlock &MBB, bool result) { + CurrentInst = nullptr; + clear(); +} - ); +bool ARC::ABAW::visit(MachineInstr &MI) { - MachineInstr *Result = Ldst.getNextNode(); - if (MoveTo == &Add) { - Ldst.removeFromParent(); - Add.getParent()->insertAfter(Add.getIterator(), &Ldst); - } - if (Result == &Add) - Result = Result->getNextNode(); + MachineBasicBlock *MBB = MI.getParent(); + if (!MBB) + return false; + if (MI.isDebugValue()) + return false; - fixPastUses(Uses, B, Incr); + // Identify inner, single block loops as SWP candidates and + // skip under vdsp-inner-post-inc=false. + if (const MachineLoop *ML = MLI.getLoopFor(MBB)) + if (ML->getNumBlocks() == 1 && ML->getSubLoops().size() == 0) + InSWPCandidate = true; - int NewOpcode = ARC::getPostIncOpcode(Ldst.getOpcode()); - assert(NewOpcode > 0 && "No postincrement form found"); - unsigned NewBaseReg = Add.getOperand(0).getReg(); - changeToAddrMode(Ldst, NewOpcode, NewBaseReg, Add.getOperand(2)); - Add.eraseFromParent(); + if (ARC::isVectorInstr(MI.getDesc().TSFlags) && + false /*TODO (!ST.shouldFavorPostIncForVDSP() || (InSWPCandidate && VDSPInnerLoopMode == InnerLoopMode::DISABLED))*/ ) + return false; - return Result; + LLVM_DEBUG(dbgs() << "[PREREGCOMBINE]>>>Visiting " << MI << "\n"); + CurrentInst = &MI; + if (getConversionToAB(MI) != 0) + return tryToCombine(MI); + + bool FoundSomething = false; + unsigned Reg = 0; + if (isAddConstantOp(MI, nullptr) || isAddRRR(MI.getOpcode())) { + Reg = ARC::getVReg(MI.getOperand(1)); + if (Reg && !Register::isStackSlot(Reg)) + FoundSomething = collectCandidates(Reg, MBB); + // TODO + if (!FoundSomething && + (MI.getOpcode() == + ARC::ADD_rrr /*|| MI.getOpcode() == ARC::ADDL_rrr*/)) { + Reg = ARC::getVReg(MI.getOperand(2)); + if (Reg && !Register::isStackSlot(Reg)) + FoundSomething = collectCandidates(Reg, MBB); + } } - return nullptr; + if (FoundSomething && analyzeCandidates(Reg, MBB) && + validateIncrementForms()) { + bool transformed = transformCandidates(Reg, MBB); + assert(transformed && "Failed to transform load/store candidates"); + return true; + } + return false; } -MachineInstr * -ARCOptAddrMode::canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add, - SmallVectorImpl *Uses) { - assert(Ldst && Add && "NULL instruction passed"); - - MachineInstr *First = Add; - MachineInstr *Last = Ldst; - if (MDT->dominates(Ldst, Add)) - std::swap(First, Last); - else if (!MDT->dominates(Add, Ldst)) - return nullptr; +// Check if either one instruction can be moved (up or down) to another. +// Returns instruction which another can be moved to (i.e., one not moved) or +// nullptr if nothing can be moved ASSUMPTIONS: +// - ldst uses one of add's operands as a base +// - ldst and add are in the same BB +MachineInstr *ARC::ABAW::canJoinInstructions(MachineInstr *ldst, + MachineInstr *add) { + assert(ldst && add && "NULL instruction passed"); - LLVM_DEBUG(dbgs() << "canJoinInstructions: " << *First << *Last); + bool isLoad = ldst->mayLoad(); - unsigned BasePos, OffPos; + unsigned B = ldst->getOperand(isLoad ? 1 : 0).getReg(); - if (!AII->getBaseAndOffsetPosition(*Ldst, BasePos, OffPos)) { - LLVM_DEBUG( - dbgs() - << "[canJoinInstructions] Cannot determine base/offset position\n"); + MachineInstr *First = add; + MachineInstr *Last = ldst; + if (DOM.dominates(ldst, add)) { + std::swap(First, Last); + } else if (!DOM.dominates(add, ldst)) { return nullptr; } - Register BaseReg = Ldst->getOperand(BasePos).getReg(); + LLVM_DEBUG(dbgs() << "canJoinInstructions " << *First << *Last); // prohibit this: // v1 = add v0, c - // st v1, [v0, 0] - // and this - // st v0, [v0, 0] - // v1 = add v0, c - if (Ldst->mayStore() && Ldst->getOperand(0).isReg()) { - Register StReg = Ldst->getOperand(0).getReg(); - if (Add->getOperand(0).getReg() == StReg || BaseReg == StReg) { - LLVM_DEBUG(dbgs() << "[canJoinInstructions] Store uses result of Add\n"); - return nullptr; - } + // st [v0, 0], v1 + if (ldst->mayStore() && ldst->getOperand(2).isReg() && + add->getOperand(0).getReg() == ldst->getOperand(2).getReg()) { + LLVM_DEBUG(dbgs() << "\tStore uses result of Add\n"); + return nullptr; } - SmallVector UsesAfterLdst; - SmallVector UsesAfterAdd; - for (MachineInstr &MI : MRI->use_nodbg_instructions(BaseReg)) { - if (&MI == Ldst || &MI == Add) + // In any case, def of B (old base) must dominate all its uses (which means + // B is not used in any PHI node) + // Also, see if there are any uses of B after either instruction (ldst and + // add) + SmallVector usesAfterLdst; + SmallVector usesAfterAdd; + for (MachineOperand &o : MRI.use_nodbg_operands(B)) { + MachineInstr *MI = o.getParent(); + if (MI == First || MI == Last) continue; - if (&MI != Add && MDT->dominates(Ldst, &MI)) - UsesAfterLdst.push_back(&MI); - else if (!MDT->dominates(&MI, Ldst)) + if (MI != add && DOM.dominates(ldst, MI)) + usesAfterLdst.push_back(MI); + else if (!DOM.dominates(MI, ldst)) return nullptr; - if (MDT->dominates(Add, &MI)) - UsesAfterAdd.push_back(&MI); + if (DOM.dominates(add, MI)) + usesAfterAdd.push_back(MI); } MachineInstr *Result = nullptr; - if (First == Add) { + if (First == add) { // n = add b, i // ... // x = ld [b, o] or x = ld [n, o] + // Case 1: can we move add down to ldst? + // Conditions: + // - No uses of 'n' before ldst + // - b is not used in any PHI (already checked above) + LLVM_DEBUG(dbgs() << "\tCan move add down?..."); if (noUseOfAddBeforeLoadOrStore(First, Last)) { Result = Last; - LLVM_DEBUG(dbgs() << "[canJoinInstructions] Can sink Add down to Ldst\n"); - } else if (canHoistLoadStoreTo(Ldst, Add)) { - Result = First; - LLVM_DEBUG(dbgs() << "[canJoinInstructions] Can hoist Ldst to Add\n"); + LLVM_DEBUG(dbgs() << "YES\n"); + } else { + Result = nullptr; + // If both instructions are within same BB check if we can sink add down + // to its first use and them hoist load/store there. + // E.g. + // %vreg134 = ADD_rru6_ %vreg39, -4 + // CMP_rr %vreg39, %vreg0, %STATUS + // %vreg46 = MOVcc_rr %vreg134, %vreg2, pred:1, + // %STATUS + // .... + // %vreg137 = LD_rs9 %vreg39, 0 + // It's possible to sink ADD past CMP and then hoist LD to ADD + if (ldst->getParent() == add->getParent()) { + MachineBasicBlock::iterator f(add), l(ldst); + unsigned R = add->getOperand(0).getReg(); + for (; f != l; ++f) { + if (f->readsVirtualRegister(R)) + break; + } + assert(f != l && "Use of add not found"); + MachineInstr *PredMI = &*std::prev(f); + if (PredMI != add && canHoistLoadStoreTo(ldst, PredMI)) { + LLVM_DEBUG(dbgs() + << "\tFound intermediate instruction to sink add to: " + << *PredMI); + add->removeFromParent(); + MachineBasicBlock *MBB = PredMI->getParent(); + // Move add right before first use + MBB->insert(f, add); + Result = add; + // Adjust array of past uses + SmallVector tmp; + for (MachineInstr *I : usesAfterAdd) { + if (DOM.dominates(add, I)) + tmp.push_back(I); + } + usesAfterAdd.swap(tmp); + } + } + LLVM_DEBUG(if (Result == nullptr) dbgs() + << "NO (add result is used in between)\n";); + } + + if (Result == nullptr) { + // Case 2: can we move ldst up to add? (longer live range) + LLVM_DEBUG(dbgs() << "\tCan move load/store up?..."); + if (!canHoistLoadStoreTo(ldst, add)) { + Result = nullptr; + LLVM_DEBUG(dbgs() << "NO (memory operation in between)\n"); + } else { + Result = First; + LLVM_DEBUG(dbgs() << "YES\n"); + } } } else { // x = ld [b, o] // ... // n = add b, i - Result = First; - LLVM_DEBUG(dbgs() << "[canJoinInstructions] Can hoist Add to Ldst\n"); + + if (SINK_LDST()) { + // Case 3: can we move ldst down to add? + LLVM_DEBUG(dbgs() << "\tCan move ldst down?..."); + if (!canSinkLoadStoreTo(ldst, add)) { + Result = nullptr; + } else { + Result = Last; + } + LLVM_DEBUG(dbgs() << (Result ? "YES\n" : "NO\n")); + } + + if (Result == nullptr) { + LLVM_DEBUG(dbgs() << "\tMoving add up\n"); + Result = ldst; + } + + // Final check: + // if add's second operand (i) is register, its def must _strictly_ dominate + // ld (valid for loads only) + unsigned A = ARC::getVReg(add->getOperand(2)); + if (A) { + if (!isLoad) { + LLVM_DEBUG(dbgs() << " NO (store cannot handle non-literal offset\n"); + return nullptr; + } + if (A == B) + A = ARC::getVReg(add->getOperand(1)); + MachineInstr *Def = getSsaDef(A); + if (Def == ldst || Def == nullptr || !DOM.dominates(Def, ldst)) { + LLVM_DEBUG(dbgs() << " NO (offset def does not dominate load\n"); + return nullptr; + } + } } - if (Result && Uses) - *Uses = (Result == Ldst) ? UsesAfterLdst : UsesAfterAdd; + + // Now check that we can update all uses of b after Result (if any) to use new + // value + InstrVector &instrs = (Result == ldst) ? usesAfterLdst : usesAfterAdd; + if (!instrs.empty()) { + if (!canFixPastUses(instrs, add, B)) + Result = nullptr; + } + + LLVM_DEBUG(if (Result != nullptr) dbgs() + << "canJoinInstructions: MoveTo: " << *Result; + else dbgs() << "Can not join instructions\n"); + return Result; } -bool ARCOptAddrMode::canFixPastUses(const ArrayRef &Uses, - MachineOperand &Incr, unsigned BaseReg) { +// Check that result of 'add' is not used before 'ldst' +bool ARC::ABAW::noUseOfAddBeforeLoadOrStore(MachineInstr *add, + MachineInstr *ldst) { + unsigned R = add->getOperand(0).getReg(); + return ARC::dominatesAllUsesOf(ldst, R, &DOM, &MRI); +} - assert(Incr.isImm() && "Expected immediate increment"); - int64_t NewOffset = Incr.getImm(); - for (MachineInstr *MI : Uses) { - int64_t Dummy; - if (isAddConstantOp(*MI, Dummy)) { - if (isValidIncrementOffset(Dummy + NewOffset)) - continue; - return false; - } - if (isLoadStoreThatCanHandleDisplacement(AII, *MI, -NewOffset)) - continue; - LLVM_DEBUG(dbgs() << "Instruction cannot handle displacement " << -NewOffset - << ": " << *MI); +void ARC::ABAW::setLoadStoreBaseOffset(MachineInstr &MI, unsigned NewBaseReg, + unsigned BaseReg, + const MachineOperand &NewOffset) { + MachineOperand Src = MachineOperand::CreateImm(0xDEADBEEF); + unsigned BaseIndex = 0; + bool IsStore = MI.mayStore(); + if (IsStore) { + // Store: NewBaseReg = ST.ab [BaseReg, NewOffset], Src + Src = MI.getOperand(2); + MI.RemoveOperand(2); + assert(NewOffset.isImm() && "Store can only handle immediate offsets"); + } else if (MI.getOperand(0).isReg() && MI.getOperand(0).isDef()) { + // Load: X, NewBaseReg = LD.ab [BaseReg, NewOffset] + BaseIndex = 1; + } else { + // Prefetch: NewBaseReg = PF.ab [BaseReg, NewOffset] + BaseIndex = 0; + } + MI.RemoveOperand(BaseIndex + 1); + MI.RemoveOperand(BaseIndex); + MI.addOperand(MachineOperand::CreateReg(NewBaseReg, true)); + MI.addOperand(MachineOperand::CreateReg(BaseReg, false)); + MI.addOperand(NewOffset); + if (IsStore) + MI.addOperand(Src); + if (!MI.getOperand(BaseIndex).isTied()) + MI.tieOperands(BaseIndex, BaseIndex + 1); +} + +void ARC::ABAW::setLoadStoreBaseOffset(MachineInstr &MI, unsigned NewBaseReg, + unsigned BaseReg, int64_t NewOffset) { + const MachineOperand Opnd = MachineOperand::CreateImm(NewOffset); + return setLoadStoreBaseOffset(MI, NewBaseReg, BaseReg, Opnd); +} + +static bool updateRegIncrement(MachineInstr &Def, ARC::RegIncrement &RI) { + unsigned NewReg = 0; + switch (Def.getOpcode()) { + // TODO upstream + // case ARC::MPYL_rru6: + case ARC::MPY_rru6: + NewReg = Def.getOperand(1).getReg(); + RI.Scale *= Def.getOperand(2).getImm(); + break; + // case ARC::ASLL_rru6: + case ARC::ASL_rru6: + NewReg = Def.getOperand(1).getReg(); + RI.Scale *= 1 << Def.getOperand(2).getImm(); + break; + // case ARC::ASL1_rr: + // NewReg = Def.getOperand(1).getReg(); + // RI.Scale *= 2; + // break; + default: return false; } + assert(Def.getOperand(0).getReg() == RI.Reg && "Wrong DEF Register"); + RI.Reg = NewReg; return true; } -void ARCOptAddrMode::fixPastUses(ArrayRef Uses, - unsigned NewBase, int64_t NewOffset) { +// Is "MI" the result of adding a constant or register to itself? +// If so, make the first source operand reference the PHI +bool ARC::ABAW::isSelfIncrementing(MachineInstr &MI) const { + switch (MI.getOpcode()) { + // TODO upstream + // case ARC::ADDL_rrr: + case ARC::ADD_rrr: { + const MachineInstr *B = getSsaDef(MI.getOperand(2)); + if (B && B->isPHI() && B->readsRegister(MI.getOperand(0).getReg())) { + // Swap operands so that the first source operand is the PHI + const MachineOperand Opd = MI.getOperand(1); + MI.RemoveOperand(1); + MI.addOperand(Opd); + return true; + } + } + LLVM_FALLTHROUGH; + case ARC::ADD_rru6: + case ARC::ADD_rrlimm: + // case ARC::ADDL_rru6: + // case ARC::ADDL_rrlimm: + // case ARC::SUBL_rru6: + case ARC::SUB_rru6: { + const MachineInstr *A = getSsaDef(MI.getOperand(1)); + if (A && A->isPHI() && A->readsRegister(MI.getOperand(0).getReg())) + return true; + break; + } + default: + break; + } + return false; +} - for (MachineInstr *MI : Uses) { - int64_t Amount; - unsigned BasePos, OffPos; - if (isAddConstantOp(*MI, Amount)) { - NewOffset += Amount; - assert(isValidIncrementOffset(NewOffset) && - "New offset won't fit into ADD instr"); - BasePos = 1; - OffPos = 2; - } else if (AII->getBaseAndOffsetPosition(*MI, BasePos, OffPos)) { - MachineOperand &MO = MI->getOperand(OffPos); - assert(MO.isImm() && "expected immediate operand"); - NewOffset += MO.getImm(); - assert(isValidLoadStoreOffset(NewOffset) && - "New offset won't fit into LD/ST"); - } else - llvm_unreachable("unexpected instruction"); - - MI->getOperand(BasePos).setReg(NewBase); - MI->getOperand(OffPos).setImm(NewOffset); - } -} - -bool ARCOptAddrMode::canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) { +// This function returns 1 for ADDs and -1 for SUBs and 0 for anything else +static int64_t findScale(MachineInstr *MI) { + int64_t Value = 0; + if (ARC::isAddConstantOp(*MI, &Value)) + return (Value >= 0) ? 1 : -1; + return 0; +} + +// TODO upsteam scaled indexed addressing +static unsigned getLoadScale(unsigned opcode, const ARCSubtarget &ST) { + return 0; +} + +// Try to find increment instruction we can combine Ldst with +// TODO: +// - port to Candidates API +// - base and offset registers are sometimes swapped - offset comes first +bool ARC::ABAW::tryToCombine(MachineInstr &Ldst) { + bool isLoad = Ldst.mayLoad(); + unsigned baseIndex = isLoad ? 1 : 0; + MachineOperand &base = Ldst.getOperand(baseIndex); + MachineOperand &offset = Ldst.getOperand(baseIndex + 1); + int64_t scale = 1; + unsigned B = ARC::getVReg(base); + if (!B || Register::isStackSlot(B)) + return false; + + if (!offset.isImm()) + return false; + + LLVM_DEBUG(dbgs() << "[PREREGCOMBINE] Looking at ldst instruction: " << Ldst); + + MachineInstr *BaseDef = getSsaDef(B); + if (BaseDef != nullptr && BaseDef->getParent() == Ldst.getParent()) { + unsigned newOp = 0; + switch (BaseDef->getOpcode()) { + // TODO: SUB_rru6, etc. + case ARC::ADD_rru6: + // case ARC::ADDL_rru6: + if (int32_t(offset.getImm()) == + -int32_t(BaseDef->getOperand(2).getImm())) { + // + // Handle: + // + // %2 = PHI ... %1 ... + // ... + // %1 = ADD_rru6 %2, 4 + // ... + // %4 = LD_rs9 %1, -4 + // + // Convert LD to: %4, %1 = LD_rs9_ab %2, 4 + // and delete ADD_rru6 + if (Ldst.getNumOperands() == 3 && + !ARC::isUsedBetween(B, BaseDef, &Ldst)) { + newOp = ARC::getConversionToAB(Ldst.getOpcode()); + if (newOp && isSelfIncrementing(*BaseDef)) + break; + newOp = 0; + } + } + LLVM_FALLTHROUGH; + // case ARC::ADDL_rrr: + case ARC::ADD_rrr: { + // Look for: + // + // %2 = PHI ... %1 ... + // ... + // %1 = ADD_rrr %2, %3 + // ... + // %4 = LD_rs9 %1, 0 + // + // Convert LD to: %4, %1 = LD_rr_aw %2, %3 + // and delete ADD_rrr + LLVM_DEBUG(dbgs() << "[PREREGCOMBINE] base is: " << *BaseDef); + if (!(Ldst.mayStore() && BaseDef->getOperand(2).isReg()) && + offset.getImm() == 0 && Ldst.getNumOperands() == 3 && + !ARC::isUsedBetween(B, BaseDef, &Ldst)) { + newOp = BaseDef->getOperand(2).isImm() + ? ARC::getConversionToAW(Ldst.getOpcode()) + : ARC::getConversionToRRAW(Ldst.getOpcode()); + } + if (newOp && isSelfIncrementing(*BaseDef)) + break; + LLVM_DEBUG(if (newOp == 0) dbgs() << "[precombine] No _rr_aw form\n"; + else dbgs() << "[precombine] not self-referential\n"); + newOp = 0; + break; + } + default: + break; + } + if (newOp != 0) { + LLVM_DEBUG(dbgs() << "[precombine] Transforming " << Ldst); + Ldst.setDesc(ST.getInstrInfo()->get(newOp)); + if (!Ldst.getOperand(0).isDef()) { + // a store + // base2 = ST_rs9_aw base1, offset, src + MachineOperand src = Ldst.getOperand(2); + Ldst.RemoveOperand(2); + Ldst.RemoveOperand(1); + Ldst.RemoveOperand(0); + Ldst.addOperand(BaseDef->getOperand(0)); + Ldst.addOperand(BaseDef->getOperand(1)); + Ldst.addOperand(BaseDef->getOperand(2)); + Ldst.addOperand(src); + } else { + // a load + // dest, base2 = ld_rs9_aw base1, offset + Ldst.RemoveOperand(2); + Ldst.RemoveOperand(1); + Ldst.addOperand(BaseDef->getOperand(0)); + Ldst.addOperand(BaseDef->getOperand(1)); + Ldst.addOperand(BaseDef->getOperand(2)); + } + LLVM_DEBUG(dbgs() << "[precombine] to: " << Ldst + << "[precombine] Deleting " << *BaseDef); + BaseDef->eraseFromParent(); + return true; + } + } + if (offset.getImm() != 0) + return false; + for (auto UI = MRI.use_nodbg_begin(B), UE = MRI.use_nodbg_end(); UI != UE; + ++UI) { + bool isImmedOffset = true; + int64_t newOffset = 0; + MachineInstr *add = UI->getParent(); + // XXX: It's important to call findScale, not isAddConstantOp here to skip + // previously generated PSEUDO_ADD + // FIXME: need a better solution + scale = findScale(add); + if (!scale) { + isImmedOffset = false; + // TODO + if (add->getOpcode() != + ARC::ADD_rrr /*&& add->getOpcode() != ARC::ADDL_rrr*/) + continue; + if (Ldst.mayStore()) + continue; + } else { + newOffset = scale * add->getOperand(2).getImm(); + if (!isInt<9>(newOffset)) + continue; + } + + MachineInstr *MoveTo = canJoinInstructions(&Ldst, add); + if (MoveTo == nullptr) + continue; + + unsigned NewOpcode = isImmedOffset + ? getConversionToAB(Ldst) + : ARC::getConversionToRRAB(Ldst.getOpcode()); + if (NewOpcode == 0) { + LLVM_DEBUG(dbgs() << "[PREREGCOMBINE] No postincrement form found: " + << Ldst << "\n"); + return false; + } + + MachineInstr *First = &Ldst; + MachineInstr *Last = add; + if (DOM.dominates(Last, First)) { + std::swap(First, Last); + } + + LLVM_DEBUG(dbgs() << "[PREREGCOMBINE] Instructions " << *First << " and " + << *Last << " combined\n"); + + // TODO upstream GEN_PSEUDO_INC() + + if (MoveTo == add) { + Ldst.removeFromParent(); + add->getParent()->insertAfter(add->getIterator(), &Ldst); + } + toBeDeleted(add); + + // Create new vreg instead of reusing add's destination to maintain SSA form + // (as we do not immediately delete add) + // Needs to be same register class as result of add. + unsigned NewBaseReg = createVirtReg(add->getOperand(0).getReg()); + Ldst.setDesc(ST.getInstrInfo()->get(NewOpcode)); + + if (isImmedOffset) { + fixIntermediates(&Ldst, add->getOperand(0).getReg(), newOffset, B); + setLoadStoreBaseOffset(Ldst, NewBaseReg, B, newOffset); + } else { + assert(!Ldst.mayStore() && + "Unexpected Store when combining with ADD_rrr"); + unsigned BaseIdx = 1, OffIdx = 2; + if (ARC::getVReg(add->getOperand(OffIdx)) == B) + std::swap(BaseIdx, OffIdx); + + assert(ARC::getVReg(add->getOperand(BaseIdx)) == B && + "Base register mismatch"); + MachineOperand &Incr = add->getOperand(OffIdx); + fixIntermediatesReg(&Ldst, add->getOperand(0).getReg(), Incr.getReg(), 1, + B); + setLoadStoreBaseOffset(Ldst, NewBaseReg, B, Incr); + tiedRegisterMap[B] = {NewBaseReg, Incr.getReg(), &Ldst}; + } + + ARC::replaceAllUsesWith(MRI, add->getOperand(0).getReg(), NewBaseReg); + + LLVM_DEBUG(dbgs() << "[PREREGCOMBINE] to form " << Ldst << "\n"); + return true; + } + return false; +} + +// Check if load/store instruction 'Ldst' can be hoisted up to instruction 'To' +// Conditions: +// - both instructions are in the same BB +// - there are no instructions with unknown side effects in between +// - there are no stores (if Ldst is a load) or loads and stores (if Ldst is a +// store) +// in between +// - If Ldst is a store, DEF of its value operand must dominate instruction To +// - If Ldst is a load with register offset, DEF of offset must dominate To +bool ARC::ABAW::canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) { if (Ldst->getParent() != To->getParent()) return false; MachineBasicBlock::const_iterator MI(To), ME(Ldst), @@ -417,111 +1353,1029 @@ for (auto &O : Ldst->explicit_operands()) { if (!O.isReg() || !O.isUse()) continue; - MachineInstr *OpDef = MRI->getVRegDef(O.getReg()); - if (!OpDef || !MDT->dominates(OpDef, To)) + MachineInstr *OpDef = getSsaDef(O); + if (!OpDef || !DOM.dominates(OpDef, To)) return false; } return true; } -// bool ARCOptAddrMode::canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) { -// // Can only sink load/store within same BB -// if (Ldst->getParent() != To->getParent()) -// return false; -// MachineBasicBlock::const_iterator MI(Ldst), ME(To), -// End(Ldst->getParent()->end()); - -// bool IsStore = Ldst->mayStore(); -// bool IsLoad = Ldst->mayLoad(); - -// Register ValReg = IsLoad ? Ldst->getOperand(0).getReg() : Register(); -// for (; MI != ME && MI != End; ++MI) { -// if (MI->isDebugValue()) -// continue; -// if (MI->mayStore() || MI->isCall() || MI->isInlineAsm() || -// MI->hasUnmodeledSideEffects()) -// return false; -// if (IsStore && MI->mayLoad()) -// return false; -// if (ValReg && MI->readsVirtualRegister(ValReg)) -// return false; -// } -// return true; -// } - -void ARCOptAddrMode::changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode, - unsigned NewBase, - MachineOperand &NewOffset) { - bool IsStore = Ldst.mayStore(); - unsigned BasePos, OffPos; - MachineOperand Src = MachineOperand::CreateImm(0xDEADBEEF); - AII->getBaseAndOffsetPosition(Ldst, BasePos, OffPos); +// Check if load/store instruction 'Ldst' can be sunk down to instruction 'To' +// Conditions: +// - both instructions are in the same BB +// - there are no instructions with unknown side effects in between +// - there are no stores (if Ldst is a load) or loads and stores (if Ldst is a +// store) +// in between +// - If Ldst is a load, its result must not be used before 'To' +bool ARC::ABAW::canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) { + if (Ldst->getParent() != To->getParent()) + return false; + MachineBasicBlock::const_iterator MI(Ldst), ME(To), + End(Ldst->getParent()->end()); - Register BaseReg = Ldst.getOperand(BasePos).getReg(); + // PREFETCH is neither one + bool IsStore = Ldst->mayStore(); + bool IsLoad = Ldst->mayLoad(); - Ldst.RemoveOperand(OffPos); - Ldst.RemoveOperand(BasePos); + if (IsLoad) { + unsigned ValReg = Ldst->getOperand(0).getReg(); + if (ARC::isUsedBetween(ValReg, MI, ME, /*isExclusiveMI*/ false)) + return false; + } - if (IsStore) { - Src = Ldst.getOperand(BasePos - 1); - Ldst.RemoveOperand(BasePos - 1); + for (; MI != ME && MI != End; ++MI) { + if (MI->isDebugValue()) + continue; + if (MI->mayStore() || MI->isCall() || MI->isInlineAsm() || + MI->hasUnmodeledSideEffects()) + return false; + if (IsStore && MI->mayLoad()) + return false; } - Ldst.setDesc(AST->getInstrInfo()->get(NewOpcode)); - Ldst.addOperand(MachineOperand::CreateReg(NewBase, true)); - if (IsStore) - Ldst.addOperand(Src); - Ldst.addOperand(MachineOperand::CreateReg(BaseReg, false)); - Ldst.addOperand(NewOffset); - LLVM_DEBUG(dbgs() << "[ABAW] New Ldst: " << Ldst); + return true; } -bool ARCOptAddrMode::processBasicBlock(MachineBasicBlock &MBB) { - bool Changed = false; - for (auto MI = MBB.begin(), ME = MBB.end(); MI != ME; ++MI) { - if (MI->isDebugValue()) +// Check if we can adjust all instructions (after ldst) using BaseReg to handle +// new base value (as incremented by Incr) +bool ARC::ABAW::canFixPastUses(const SmallVectorImpl &Uses, + MachineInstr *Incr, unsigned BaseReg) { + + MachineOperand &O = Incr->getOperand(2); + if (O.isImm()) { + int64_t NewOffset = O.getImm(); + for (MachineInstr *MI : Uses) { + if (isAddConstantOp(*MI, nullptr)) + continue; + if (!ARC::isLoadStoreThatCanHandleDisplacement(*MI, -NewOffset)) { + LLVM_DEBUG(dbgs() << "\tInstruction cannot handle displacement " + << -NewOffset << ": " << *MI); + return false; + ; + } + + if (IsOptimizeForSpace) { + // unsigned Opcode = MI->getOpcode(); + if (mayHaveShortForm(*MI) /*TODO && Opcode != ARC::PREFETCH_rs9*/) { + // In ideal case, all instructions can be transformed to short + // (2 byte forms). But load/stores with postincrement or negative + // offsets have no short forms, so they're 4 bytes. + // Merging increment into load/store does not give a win for code size + // (2+2 == 4+0), so any change to negative offset will grow size. + unsigned offsetIdx = MI->mayLoad() ? 2 : 1; + int64_t Offset = MI->getOperand(offsetIdx).getImm(); + if (Offset >= 0 && (Offset - NewOffset) < 0) { + LLVM_DEBUG(dbgs() << "\tNegative offset may hurt code size\n"); + return false; + } + } + } + } + return true; + } + + // Offset is register + // Operands of ADD must be Incr's base register and addend register: + // %vreg10 = ADD_rrr %vreg9, %vreg2 + // LD/ST [%vreg9, %vreg2] + // %vreg11 = ADD2_rrr %vreg9, %vreg2 <- We're seeking to update this + unsigned AddendReg = O.getReg(); + if (AddendReg == BaseReg) + AddendReg = Incr->getOperand(1).getReg(); + for (MachineInstr *MI : Uses) { + switch (MI->getOpcode()) { + // TODO upstream + // case ARC::ADD1L_rrr: + // case ARC::ADD2L_rrr: + // case ARC::ADD3L_rrr: + case ARC::ADD1_rrr: + case ARC::ADD2_rrr: + case ARC::ADD3_rrr: + if (ST.isDSPv1() || !ST.hasMpy()) { + LLVM_DEBUG(dbgs() << "\tTarget has no MPY insn\n"); + return false; + } + LLVM_FALLTHROUGH; + // case ARC::ADDL_rrr: + case ARC::ADD_rrr: { + unsigned R = ARC::getVReg(MI->getOperand(2)); + if (!R) + break; + if (ARC::getVReg(MI->getOperand(1)) == BaseReg && R == AddendReg) + continue; + // TODO + if (MI->getOpcode() == + ARC::ADD_rrr /*|| MI->getOpcode() == ARC::ADDL_rrr*/) { + // Previous transformation could have changed reg1 = ADD2 reg2, reg3 + // to reg4 = MPY reg3, 3; reg1 = ADD reg2, reg4 + MachineInstr *AddendDef = getSsaDef(R); + if (!AddendDef || (AddendDef->getOpcode() != ARC::MPY_rru6 /* TODO: && AddendDef->getOpcode() != ARC::MPYL_rru6*/)) + break; + if (ARC::getVReg(AddendDef->getOperand(1)) == AddendReg) + continue; + } + } break; + default: + if (ARC::getConversionToRS9(MI->getOpcode())) { + unsigned R = ARC::getVReg(MI->getOperand(MI->mayLoad() ? 2 : 1)); + if (R == AddendReg) + continue; + } + } + LLVM_DEBUG( + dbgs() + << "\tOffset is register and there are uses of base after ld/add\n"); + return false; + } + return true; +} + +// Adjust instructions past ldst using 'Base' to accomodate register addendum +// 'Addend' +void ARC::ABAW::fixIntermediatesReg(MachineInstr *ldst, unsigned newBase, + unsigned Addend, unsigned Scale, + unsigned Base) { + LLVM_DEBUG(dbgs() << "fixIntermediatesReg: change " << printReg(Base, TRI) + << " to " << printReg(newBase, TRI) << " with addend " + << printReg(Addend, TRI) << " * " << Scale << " after " + << *ldst); + for (auto UI = MRI.use_nodbg_begin(Base), UE = MRI.use_nodbg_end(); UI != UE; + ++UI) { + MachineInstr *MI = UI->getParent(); + if (MI->getOpcode() == ARC::TBD) continue; - if (!MI->mayLoad() && !MI->mayStore()) + if (ldst == MI || DOM.dominates(MI, ldst)) continue; - if (ARC::getPostIncOpcode(MI->getOpcode()) < 0) + LLVM_DEBUG(dbgs() << "fixIntermediatesReg: " << *MI); + unsigned Opcode = MI->getOpcode(); + unsigned RS9 = ARC::getConversionToRS9(Opcode); + // canFixPastUses and similar checks ensure that MI is either Load or Add + // constant + if (RS9 != 0) { + // ld_rr [Base, Addend] -> ld_rs9 [NewBase, 0] + MI->setDesc(ST.getInstrInfo()->get(RS9)); + MI->getOperand(MI->mayLoad() ? 1 : 0).setReg(newBase); + MI->getOperand(MI->mayLoad() ? 2 : 1).ChangeToImmediate(0); + LLVM_DEBUG(dbgs() << "Changed to " << *MI); continue; - MachineInstr *Res = tryToCombine(*MI); - if (Res) { - Changed = true; - // Res points to the next instruction. Rewind to process it - MI = std::prev(Res->getIterator()); + } + + // Perform 'poor man' copy propagation on tied registers + bool done = false; + unsigned AddScale = isAddRRR(Opcode); + assert(AddScale && "Unexpected opcode"); + + Register OffReg = MI->getOperand(2).getReg(); + if (OffReg != Addend) { + // Only possible if on previous iteration we changed e.g. + // ADD2 base, addend; to tmp = MPY addend, 3; ADD base, tmp; + assert(MRI.hasOneNonDBGUser(OffReg)); + MachineInstr *OffDef = getSsaDef(OffReg); // %tmp + assert( + OffDef && + (OffDef->getOpcode() == + ARC::MPY_rru6 /* TODO: || OffDef->getOpcode() == ARC::MPYL_rru6*/)); + assert(OffDef->getOperand(1).getReg() == Addend); + int64_t Factor = OffDef->getOperand(2).getImm() - Scale; // N - Scale + assert(Factor >= 0); + // Change + // %newBase = %Base + %Addend * Scale + // ... + // %tmp = MPY %Addend, N ; %tmp = %Addend * N + // %foo = ADD %Base, %tmp ; %foo = %Base + %Addend * N + if (Factor == 0) { // N == Scale + // to: + // %foo = copy %newBase + OffDef->eraseFromParent(); + MI->setDesc(ST.getInstrInfo()->get(ARC::COPY)); + MI->getOperand(1).setReg(newBase); + MI->RemoveOperand(2); + LLVM_DEBUG(dbgs() << "Changed to " << *MI); + } else if (Factor > 0) { // N > Scale + // to: + // %tmp = MPY %Addend, (N - Scale) ; %tmp = %Addend * (N - Scale) + // %foo = ADD %newBase, %tmp ; %foo = %newBase + %Addend * (N - + // Scale) + // ; = %Base + %Addend * N + OffDef->getOperand(2).setImm(Factor); + MI->getOperand(1).setReg(newBase); + LLVM_DEBUG(dbgs() << "Changed to " << *OffDef << *MI); + } + continue; + } + + do { + AddScale -= Scale; + done = (AddScale == 0); + + if (!done) { + done = true; + auto search = tiedRegisterMap.find(newBase); + if (search != tiedRegisterMap.end()) { + TiedRegIncrement i = search->second; + if (!DOM.dominates(i.def, MI)) { + break; // Unusable + } + if (i.incReg == Addend) { + LLVM_DEBUG(dbgs() << "\tFound map " << printReg(newBase, TRI) + << " --> " << printReg(i.newReg, TRI)); + newBase = i.newReg; + done = false; + } + } + } + } while (!done); + + if (AddScale == 0) { + // TODO: if it is safe to remove this instruction here, then we can: + // ARC::replaceAllUsesWith(MRI, MI->getOperand(0).getReg(), newBase); + + // ADD_rrr Base, Addend -> ADD_rs9 NewBase, 0 (NOP, in fact) + // TODO + // unsigned ADD = ST.isArc64() ? ARC::ADDL_rru6 : ARC::ADD_rru6; + unsigned ADD = ST.isArc64() ? ARC::ADD_rru6 : ARC::ADD_rru6; + MI->setDesc(ST.getInstrInfo()->get(ADD)); + MI->getOperand(1).setReg(newBase); + MI->getOperand(2).ChangeToImmediate(0); + LLVM_DEBUG(dbgs() << "Changed to " << *MI); + continue; + } + + unsigned BaseIdx = 1; + if (ARC::getVReg(MI->getOperand(BaseIdx)) == Addend) + BaseIdx = 2; + + if (AddScale == 2 || AddScale == 4 || AddScale == 8) { + static const unsigned op[2][3] = { + {ARC::ADD1_rrr, ARC::ADD2_rrr, ARC::ADD3_rrr} + // TODO: {ARC::ADD1L_rrr,ARC::ADD2L_rrr,ARC::ADD3L_rrr} + }; + unsigned newOpcode = op[ST.isArc64()][AddScale >> 2]; + + // Add's operands can be swapped: + // %vreg98 = LD_rs9 %vreg2, 0 + // %vreg10 = ADD_rrr %vreg9, %vreg2 + MI->setDesc(ST.getInstrInfo()->get(newOpcode)); + MI->getOperand(BaseIdx).setReg(newBase); + LLVM_DEBUG(dbgs() << "Changed to " << *MI); + } else { + // Max scale is 8 (ADD3). + // Since we decremented it at least once, it must be odd or 6 here + assert((AddScale & 1) || AddScale == 6); + unsigned R = createVirtReg(Addend); + bool mpyOK; + if (ST.isArc64()) { + mpyOK = ST.hasMpy64(); + } else { + mpyOK = ST.hasMpy() && !ST.isDSPv1(); + } + MachineInstr *NewMI = nullptr; + // TODO + // unsigned ADD = ST.isArc64() ? ARC::ADDL_rrr : ARC::ADD_rrr; + unsigned ADD = ST.isArc64() ? ARC::ADD_rrr : ARC::ADD_rrr; + if (mpyOK) { + // unsigned MPY = ST.isArc64() ? ARC::MPYL_rru6 : ARC::MPY_rru6; + unsigned MPY = ST.isArc64() ? ARC::MPY_rru6 : ARC::MPY_rru6; + NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + ST.getInstrInfo()->get(MPY), R) + .addReg(Addend) + .addImm(AddScale); + } else if (AddScale == 3 || AddScale == 5) { + unsigned ADD_SCALED; + if (AddScale == 3) { + // ADD_SCALED = ST.isArc64() ? ARC::ADD1L_rrr : ARC::ADD1_rrr; + ADD_SCALED = ST.isArc64() ? ARC::ADD1_rrr : ARC::ADD1_rrr; + } else { + // ADD_SCALED = ST.isArc64() ? ARC::ADD2L_rrr : ARC::ADD2_rrr; + ADD_SCALED = ST.isArc64() ? ARC::ADD2_rrr : ARC::ADD2_rrr; + } + NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + ST.getInstrInfo()->get(ADD_SCALED), R) + .addReg(Addend) + .addImm(Addend); + } else if (AddScale == 6) { + // unsigned SHL = ST.isArc64() ? ARC::ASLL_rru6 : ARC::ASL_rru6; + unsigned SHL = ST.isArc64() ? ARC::ASL_rru6 : ARC::ASL_rru6; + unsigned T1 = createVirtReg(Addend); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + ST.getInstrInfo()->get(SHL), T1) + .addReg(Addend) + .addImm(2); + // unsigned ADD_SCALED = ST.isArc64() ? ARC::ADD1L_rrr : ARC::ADD1_rrr; + unsigned ADD_SCALED = ST.isArc64() ? ARC::ADD1_rrr : ARC::ADD1_rrr; + NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + ST.getInstrInfo()->get(ADD_SCALED), R) + .addReg(T1) + .addImm(Addend); + } else { + assert(AddScale == 7); + // unsigned SHL = ST.isArc64() ? ARC::ASLL_rru6 : ARC::ASL_rru6; + unsigned SHL = ST.isArc64() ? ARC::ASL_rru6 : ARC::ASL_rru6; + unsigned T = createVirtReg(Addend); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + ST.getInstrInfo()->get(SHL), T) + .addReg(Addend) + .addImm(3); + // unsigned SUB = ST.isArc64() ? ARC::SUBL_rru6 : ARC::SUB_rru6; + unsigned SUB = ST.isArc64() ? ARC::SUB_rru6 : ARC::SUB_rru6; + NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + ST.getInstrInfo()->get(SUB), R) + .addImm(T) + .addReg(Addend); + } + MI->setDesc(ST.getInstrInfo()->get(ADD)); + MI->getOperand(BaseIdx).setReg(newBase); + MI->getOperand((BaseIdx == 1) ? 2 : 1).setReg(R); + LLVM_DEBUG(dbgs() << "Changed to " << *NewMI << *MI); } } - return Changed; } -bool ARCOptAddrMode::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(MF.getFunction()) || KILL_PASS()) +// Adjust add/load/store instructions (having 'base' as base operand) after ldst +// by delta 'newOffset' +void ARC::ABAW::fixIntermediates(MachineInstr *ldst, unsigned newBase, + int64_t newOffset, unsigned oldBase) { + + MachineRegisterInfo::use_nodbg_iterator UINext; + for (auto UI = MRI.use_nodbg_begin(oldBase), UE = MRI.use_nodbg_end(); + UI != UE; UI = UINext) { + MachineInstr *MI = UI->getParent(); + UINext = ++UI; + if (MI->getOpcode() == ARC::TBD) + continue; + if (DOM.dominates(MI, ldst) || (ldst == MI)) { + continue; + } + int64_t amount; + if (isAddConstantOp(*MI, &amount)) { + LLVM_DEBUG(dbgs() << "[PREREGCOMBINE] Add constant to be adjusted by " + << newOffset << ": " << *MI); + MachineBasicBlock::iterator InsertPoint(MI); + MachineInstr *NewAdd = ARC::buildAddByConstant( + MRI, *(MI->getParent()), InsertPoint, MI->getDebugLoc(), + MI->getOperand(0).getReg(), newBase, amount - newOffset); + toBeDeleted(MI); + LLVM_DEBUG(dbgs() << "[PREREGCOMBINE] ... to produce: " << *NewAdd); + continue; + } + + unsigned baseIdx = 0, offsetIdx = 1; + // unsigned Opcode = MI->getOpcode(); + // getBaseAndOffsetPosition does not work for PREFETCH + // neither does it works for LDD/STD + + // PREFETCH does not have MayLoad attribute in .td + if (MI->mayLoad()) { + baseIdx = 1; + offsetIdx = 2; + } + + if (MI->getOperand(baseIdx).isReg() && + (MI->getOperand(baseIdx).getReg() == oldBase)) { + assert(MI->getOperand(offsetIdx).isImm()); + LLVM_DEBUG(dbgs() << "[PREREGCOMBINE] Offset to be adjusted by " + << newOffset << ": " << *MI); + MI->getOperand(offsetIdx).setImm(MI->getOperand(offsetIdx).getImm() - + newOffset); + MI->getOperand(baseIdx).setReg(newBase); + LLVM_DEBUG(dbgs() << "[PREREGCOMBINE] ... to produce: " << *MI); + } else { + LLVM_DEBUG(if (!MI->getOperand(baseIdx).isReg()) dbgs() + << "Operand at " << baseIdx << " is not a register in " + << *MI; + else dbgs() << "ERROR: Ldst base = " << printReg(oldBase, TRI) + << " does not match operand at pos " << baseIdx + << " in " << *MI); + } + } +} + +void ARC::ABAW::clear() { + Candidates.clear(); + Increments.clear(); + ExternalDominatedUses.clear(); + ExternalDominatingUses.clear(); + GeneratePreInc = false; +} + +bool ARC::ABAW::collectCandidates(unsigned BaseReg, MachineBasicBlock *MBB) { + LLVM_DEBUG(dbgs() << "collectCandidates: BaseReg = " << printReg(BaseReg, TRI) + << "\n"); + + clear(); + + // Keep track of vector candidates in inner loops. only + // permit a single vector candidate under -vdsp-inner-post-inc=single. + bool seenVectorLDST = false; + const bool limitVectorLDST = + InSWPCandidate && + false /*TODO VDSPInnerLoopMode == InnerLoopMode::SINGLE*/; + const bool disableVectorLDST = + InSWPCandidate && + false /*TODO VDSPInnerLoopMode == InnerLoopMode::DISABLED*/; + + for (MachineInstr &MI : MRI.use_nodbg_instructions(BaseReg)) { + MachineBasicBlock *B = MI.getParent(); + if (B != MBB) { + + if (DOM.dominates(MBB, B)) + ExternalDominatedUses.push_back(&MI); + else if (DOM.dominates(B, MBB)) + ExternalDominatingUses.push_back(&MI); + else + return false; + + continue; + } + bool isVector = ARC::isVectorInstr(MI.getDesc().TSFlags); + int64_t amount; + if (getConversionToAB(MI) != 0 || + ARC::getConversionFromRRToRRAB(MI.getOpcode()) != 0) { + unsigned BaseIdx = MI.mayLoad() ? 1 : 0; + unsigned OffIdx = BaseIdx + 1; + if (ARC::getVReg(MI.getOperand(BaseIdx)) != BaseReg) { + // LD %off, %base + if (ARC::getVReg(MI.getOperand(OffIdx)) == BaseReg) + std::swap(BaseIdx, OffIdx); + else + return false; + } + MachineOperand &Off = MI.getOperand(OffIdx); + if (MI.getDesc().mayStore()) { // ignore AGU XY operands + // FIXME: + // 1) verify other similar cases with unexpected duplicate registers + // (Offset?) 2) try to optimize instead of skipping + // + // STAR 9001396665 + // st_s %r0,[%r0,12] + // st_s %r0,[%r0,8] + // add_s %r0,%r0,16 + + unsigned SrcIdx = OffIdx + 1; + if (SrcIdx < MI.getNumExplicitOperands()) { + MachineOperand &Src = MI.getOperand(SrcIdx); + if (Src.isReg() && Src.getReg() == BaseReg) { + LLVM_DEBUG( + dbgs() + << "Can't optimize: STORE Source register matches BaseReg\n"); + return false; + } + } + } + if (Off.isImm()) { + if (!(isVector && + ((limitVectorLDST && seenVectorLDST) || disableVectorLDST))) { + BaseIncr BI(Off.getImm()); + Candidates.push_back(std::make_pair(&MI, BI)); + seenVectorLDST |= isVector; + } + } else if (unsigned Reg = ARC::getVReg(Off)) { + if (!(isVector && + ((limitVectorLDST && seenVectorLDST) || disableVectorLDST))) { + BaseIncr BI(Reg, 1); + Candidates.push_back(std::make_pair(&MI, BI)); + seenVectorLDST |= isVector; + } + } else { + return false; + } + } else if (unsigned Scale = getLoadScale(MI.getOpcode(), ST)) { + unsigned OffIdx = MI.mayLoad() ? 2 : 1; + BaseIncr BI(MI.getOperand(OffIdx).getReg(), Scale); + if (BI.getReg() == BaseReg) + return false; // Scaled loads are not commutative + if (!(isVector && + ((limitVectorLDST && seenVectorLDST) || disableVectorLDST))) { + Candidates.push_back(std::make_pair(&MI, BI)); + seenVectorLDST |= isVector; + } + } else if (isAddConstantOp(MI, &amount)) { + BaseIncr BI((int64_t)amount); + Increments.push_back(std::make_pair(&MI, BI)); + } else if (unsigned Scale = isAddRRR(MI)) { + unsigned OffReg = ARC::getVReg(MI.getOperand(2)); + if (OffReg == BaseReg) { + // TODO + if (MI.getOpcode() != + ARC::ADD_rrr /*&& MI.getOpcode() != ARC::ADDL_rrr*/) + return false; + OffReg = ARC::getVReg(MI.getOperand(1)); + } + if (OffReg == 0) { + // Bail out on physical registers + return false; + } + BaseIncr BI(OffReg, Scale); + Increments.push_back(std::make_pair(&MI, BI)); + } else { + return false; + } + } + + if (Candidates.empty()) { + LLVM_DEBUG(dbgs() << "\tno candidate loads found\n"); return false; + } - if (DUMP_BEFORE()) - MF.dump(); - if (VIEW_BEFORE()) - MF.viewCFG(); + // Make sure instructions are lexically ordered + std::sort(Candidates.begin(), Candidates.end(), + [this](InstIncrPair a, InstIncrPair b) { + return Ordinals[a.first] < Ordinals[b.first]; + }); + // Use lambda so clang-format can do a decent job with it + auto dumpIt = [&]() { + dbgs() << "Candidates:\n"; + for (auto &C : Candidates) { + dbgs() << Ordinals[C.first] << ": " << *C.first << "\t"; + C.second.print(dbgs(), TRI); + dbgs() << "\n"; + } + dbgs() << "Increments:\n"; + for (auto &I : Increments) { + dbgs() << Ordinals[I.first] << ": " << *I.first << "\t"; + I.second.print(dbgs(), TRI); + dbgs() << "\n"; + } + dbgs() << "ExternalDominatedUses:\n"; + for (auto &EU : ExternalDominatedUses) { + dbgs() << *EU; + } + dbgs() << "ExternalDominatingUses:\n"; + for (auto &EU : ExternalDominatingUses) { + dbgs() << *EU; + } + }; + LLVM_DEBUG(dumpIt()); + return true; +} - AST = &MF.getSubtarget(); - AII = AST->getInstrInfo(); - MRI = &MF.getRegInfo(); - MDT = &getAnalysis(); +bool ARC::ABAW::analyzeCandidates(unsigned BaseReg, MachineBasicBlock *MBB) { + LLVM_DEBUG(dbgs() << "analyzeCandidates: BaseReg = " << printReg(BaseReg, TRI) + << "\n"); - bool Changed = false; - for (auto &MBB : MF) - Changed |= processBasicBlock(MBB); + // TODO: eh? try to combine to single increment? + if (Increments.size() > 1) { + LLVM_DEBUG(dbgs() << "\tmultiple increments found; not yet implemented\n"); + return false; + } - if (DUMP_AFTER()) - MF.dump(); - if (VIEW_AFTER()) - MF.viewCFG(); - return Changed; + if (Increments.empty()) { + // TODO: Increment might be found in ExternalDominatedUses. + // Consider loop with if statement with load and increment in different + // blocks + LLVM_DEBUG( + dbgs() << "\tno increments found in block; not yet implemented\n"); + return false; + } + + bool isAllImm = Increments.back().second.IsImm; + bool isAllReg = !isAllImm; + bool seenStore = false; + unsigned R = Increments.back().first->getOperand(0).getReg(); + for (auto &C : Candidates) { + bool IsImm = C.second.IsImm; + // LD_rs9, %Base, 0 works for both cases + isAllImm &= IsImm; + isAllReg &= (!IsImm || C.second.getImm() == 0); + if (!IsImm && !Register::isVirtualRegister(C.second.getReg())) + return false; + if (C.first->mayStore()) { + unsigned ValReg = ARC::getVReg(C.first->getOperand(2)); + // prohibit this: + // v1 = add v0, c + // st [v0, 0], v1 + if (ValReg == R) + return false; + seenStore = true; + } + } + + if (!isAllImm && !isAllReg) { + if (Candidates.size() == 1 && (!seenStore || Increments[0].second.IsImm) && + tryToTransformPHI(BaseReg, MBB)) { + isAllImm = Increments[0].second.IsImm; + isAllReg = !isAllImm; + } else { + LLVM_DEBUG(dbgs() << "\tirregular offsets detected\n"); + return false; + } + } + if (!isAllImm && seenStore) { + LLVM_DEBUG(dbgs() << "\tstore instruction cannot handle register offset\n"); + return false; + } + + MachineInstr *Incr = Increments.back().first; + + // Can we sink increment past last use? + MachineInstr *Last = Candidates.back().first; + if (Ordinals[Incr] < Ordinals[Last]) { + if (noUseOfAddBeforeLoadOrStore(Incr, Last)) { + // EMPTY + } else if (canHoistLoadStoreTo(Last, Incr) && Candidates.size() == 1) { + // TODO: can we handle Candidates.size() > 1 case? + Last->removeFromParent(); + MBB->insertAfter(Incr, Last); + LLVM_DEBUG(dbgs() << "\tinstruction " << *Last << "\twas moved to " + << *Incr); + } else { + // TODO: Add everything after increment to ExternalDominatedUses and try + // to combine + LLVM_DEBUG(dbgs() << "\tcannot sink increment after last use\n"); + return false; + } + } + + // Append increments to the candidate list for simpler analysis + Candidates.insert(Candidates.end(), Increments.begin(), Increments.end()); + + return isAllImm ? analyzeCandidatesImm(BaseReg, MBB) + : analyzeCandidatesReg(BaseReg, MBB); +} + +bool ARC::ABAW::analyzeCandidatesImm(unsigned BaseReg, MachineBasicBlock *MBB) { + LLVM_DEBUG(dbgs() << "analyzeCandidatesImm: BaseReg = " + << printReg(BaseReg, TRI) << "\n"); + // Cannot handle multiple increments + if (Increments.size() != 1) { + LLVM_DEBUG(dbgs() << "\tmultiple increments case is not handled\n"); + return false; + } + + MachineInstr *Incr = Increments[0].first; + + if (!ExternalDominatedUses.empty() && + !canFixPastUses(ExternalDominatedUses, Incr, BaseReg)) { + return false; + } + + if (Candidates[0].second.getImm() != 0) { + // Try to generate preinc instruction first + if (tryToGenPreIncImm()) + return true; + } + + if (!checkCandidatesImm()) { + // Try to reorder memory instrustions to get foldable chain + if (!tryToReorderCandidatesImm() || !checkCandidatesImm()) { + return false; + } + } + + if (Candidates[0].second.getImm() != 0) { + if (!tryToTransformPHI(BaseReg, MBB)) { + if (!tryToReorderCandidatesImm() || !tryToTransformPHI(BaseReg, MBB)) + return false; + } + } + + // TODO: this assert was put in assumption is that checkCandidatesImm does not + // depend on PHI transformation. But in fact, it does. Consider: + // LD %0, -68 + // LD %0, -4 + // ADD %0, 8 + // Before PHI transform, checkCandidatesImm would return true (as offset + // deltas are all positive), but after transfor it will turn into + // LD %1, 0 + // LD %1, 64 + // ADD %1, 8 + // Second LD would need negative offset, so checkCandidatesImm will return + // false at -Os Need to fix checkCandidatesImm to handle non-zero first offset + // TODO: In the above, only first load needs to be changed to LD_ab + // testcase: audio_codecs/sbc_decoder/src/decoder/sbc_decoder.c @ -Os + // hs45d_voice_audio assert(checkCandidatesImm() && "Unxepected unfoldable + // chain detected"); + if (!checkCandidatesImm()) + return false; + + // Update offsets with new offset values for postincrements + size_t N = Candidates.size(); + for (size_t i = 0; i < N - 1; ++i) { + int64_t NewOffset = + Candidates[i + 1].second.getImm() - Candidates[i].second.getImm(); + Candidates[i].second.setImm(NewOffset); + } + + if (ARC::isOptimizeForSpace(MF)) { + // We don't have compact ld/st .ab/.aw instructions + unsigned CompactSave = 0; + for (auto &P : Candidates) { + if (P.second.IsImm && + isUInt<5>(P.second.getImm()) && // TODO check u5/u6/u7 ldub/lduh/ld + mayHaveShortForm(*P.first)) + CompactSave += 2; // Would save 2 bytes over incremented ld/st + } + // TODO/CHECKME check -Wcg,-arc-abaw-max-space=N setting + // Do we want different value for -Os1? + if (CompactSave > ArcAbawMaxSpace) { + LLVM_DEBUG(dbgs() << "analyzeCandidatesImm: Compact form saves " + << CompactSave << " bytes\n"); + return false; + } + } + + // Remove increment from candidate list + assert(Candidates.back().first == Incr); + Candidates.pop_back(); + return true; +} + +bool ARC::ABAW::analyzeCandidatesReg(unsigned BaseReg, MachineBasicBlock *MBB) { + + LLVM_DEBUG(dbgs() << "analyzeCandidatesReg: BaseReg = " + << printReg(BaseReg, TRI) << "\n"); + if (!Candidates[0].second.IsImm) { + LLVM_DEBUG( + dbgs() << "\tcannot handle reg chain starting with non-zero offset\n"); + return false; + } + + // prune duplicate ldst instructions with zero offset + unsigned LastZeroOff = 0; + for (size_t i = 0; i < Candidates.size(); ++i) { + if (Candidates[i].second.IsImm && Candidates[i].second.getImm() == 0) + LastZeroOff = i; + } + if (LastZeroOff != 0) { + Candidates.erase(Candidates.begin(), Candidates.begin() + LastZeroOff); + } + + bool NeedToPropagateOffsetReg = false; + unsigned OffsetReg = Increments[0].second.getReg(); + MachineInstr *OffDef = getSsaDef(OffsetReg); + if (OffDef == nullptr) + return false; + if (Candidates.size() > 1) { + for (auto &C : Candidates) { + if (OffDef == C.first || !DOM.dominates(OffDef, C.first)) + return false; + if (C.second.IsImm) + continue; + unsigned Reg = C.second.getReg(); + NeedToPropagateOffsetReg |= (Reg != OffsetReg); + } + } + + std::map RegValueMap; + + if (NeedToPropagateOffsetReg) { + // Instructions use different addend registers + // Try to value propagate them to find common value + // E.g. + // %vreg11 = ASL1_rr %vreg5 + // ... + // %vreg90 = LD_rras %vreg44, %vreg5 - Addend %vreg5, scale 4 + // %vreg90 = LD %vreg44, %vreg11 - Addend %vreg11, scale 1; can + // propagate to %vreg5, scale 2 %vreg53 = ADD2_rrr %vreg44, %vreg11 - + // Addend %vreg11, scale 4; can propagate to %vreg5, scale 8 + + for (auto &C : Candidates) { + if (C.second.IsImm) + continue; + RegIncrement RI = {C.second.getReg(), 1}; + + bool done = false; + while (!done) { + unsigned Reg = RI.Reg; + if (MachineInstr *Def = getSsaDef(Reg)) { + done = !updateRegIncrement(*Def, RI); + } else { + done = true; + } + } + unsigned OrigReg = C.second.getReg(); + if (OrigReg != RI.Reg) { + RegValueMap[OrigReg] = RI; + LLVM_DEBUG(dbgs() << "\tRecording " << printReg(OrigReg, TRI) << " -> " + << printReg(RI.Reg, TRI) << " * " << RI.Scale + << "\n"); + RI.Scale *= C.second.getScale(); + C.second.getRegIncrement() = RI; + } + } + LLVM_DEBUG(dbgs() << "Candidates after value propagation:\n"; + for (auto &C + : Candidates) { + dbgs() << *C.first << "\t"; + C.second.print(dbgs(), TRI); + dbgs() << "\n"; + }); + } + + // All instruction in chain must have same offset register with uniform stride + size_t N = Candidates.size(); + unsigned CommonAddend = Candidates[1].second.getReg(); + int64_t Delta = Candidates[1].second.getScale(); + for (size_t i = 2; i < N; ++i) { + if (Candidates[i].second.getReg() != CommonAddend) { + LLVM_DEBUG(dbgs() << "\tcommon addend register not found[" << i + << "]: " << *Candidates[i].first); + return false; + } + int64_t D = + Candidates[i].second.getScale() - Candidates[i - 1].second.getScale(); + if (D != Delta) { + LLVM_DEBUG(dbgs() << "\tnon-uniform delta[" << i << "]: " << D << "\n"); + return false; + } + } + + if (Delta != 1) { + // Try to find existing vreg with necessary value + LLVM_DEBUG(dbgs() << "Look for existing vreg for " + << printReg(CommonAddend, TRI) << " * " << Delta << "\n"); + for (auto &I : RegValueMap) { + RegIncrement RI = I.second; + if (RI.Reg == CommonAddend && (int64_t)RI.Scale == Delta) { + LLVM_DEBUG(dbgs() << "\tFound suitable vreg " << printReg(I.first, TRI) + << ": " << printReg(RI.Reg, TRI) << " * " << RI.Scale + << "\n"); + CommonAddend = I.first; + Delta = 1; + break; + } + } + } + + // TODO: We can try to move instructions around + MachineInstr *Def = getSsaDef(CommonAddend); + if (!Def || !DOM.dominates(Def, Candidates[0].first)) { + LLVM_DEBUG( + dbgs() << "Def of addend register does not dominate all mem instrs\n"); + return false; + } + + if (!ExternalDominatedUses.empty() && + !canFixPastUses(ExternalDominatedUses, Increments[0].first, BaseReg)) { + LLVM_DEBUG(dbgs() << "\tcannot fix past uses \n"); + return false; + } + + if (Delta != 1) { + // Cannot find existing vreg holding CommonAddend * Delta + // Generate new instruction + unsigned R = createVirtReg(CommonAddend); + MachineInstr *CommonAddendDef = getSsaDef(CommonAddend); + LLVM_DEBUG(dbgs() << "\tCommonAddendDef: " << *CommonAddendDef); + if (CommonAddendDef->getParent() == MBB) { + LLVM_DEBUG( + dbgs() + << "Creating new instruction in the same MBB is unprofitable\n"); + return false; + } + MachineBasicBlock *InsBB = CommonAddendDef->getParent(); + MachineBasicBlock::iterator I(CommonAddendDef); + ++I; + while (I != InsBB->end() && I->getOpcode() == ARC::PHI) + ++I; + MachineInstr *NewAddend = nullptr; + if (isPowerOf2_64(Delta)) { + unsigned Shift = Log2_64(Delta); + // unsigned ShiftOP = ST.isArc64() ? ARC::ASLL_rru6 : ARC::ASL_rru6; + unsigned ShiftOP = ST.isArc64() ? ARC::ASL_rru6 : ARC::ASL_rru6; + NewAddend = BuildMI(*InsBB, I, CommonAddendDef->getDebugLoc(), + ST.getInstrInfo()->get(ShiftOP), R) + .addReg(CommonAddend) + .addImm(Shift); + } else { + if (!ST.hasMpy()) { + LLVM_DEBUG(dbgs() << "Target has no MPY option, cannot generate " + "required instruction\n"); + return false; + } + if (ST.isArc64() && !ST.hasMpy64()) { + LLVM_DEBUG(dbgs() << "Target has no MPY option, cannot generate " + "required instruction\n"); + return false; + } + if (ST.isDSPv1()) { + LLVM_DEBUG(dbgs() << "Cannot use MPY instruction in DSPv1 ISA\n"); + return false; + } + // unsigned MPY = ST.isArc64() ? ARC::MPYL_rru6 : ARC::MPY_rru6; + unsigned MPY = ST.isArc64() ? ARC::MPY_rru6 : ARC::MPY_rru6; + NewAddend = BuildMI(*InsBB, I, CommonAddendDef->getDebugLoc(), + ST.getInstrInfo()->get(MPY), R) + .addReg(CommonAddend) + .addImm(Delta); + } + LLVM_DEBUG(dbgs() << "\tCreated new instr for addend: " << *NewAddend); + CommonAddend = R; + } + + for (auto &C : Candidates) { + C.second.IsImm = false; + C.second.setReg(CommonAddend); + C.second.setScale(1); + } + + // Remove increment from candidate list + assert(Candidates.back().first == Increments.back().first); + Candidates.pop_back(); + return true; +} + +static unsigned getFormRRAB(unsigned op) { + unsigned NewOpcode = ARC::getConversionToRRAB(op); + if (NewOpcode == 0) + NewOpcode = ARC::getConversionFromScaledToRRAB(op); + if (NewOpcode == 0) + NewOpcode = ARC::getConversionFromRRToRRAB(op); + return NewOpcode; +} + +// Verify all "Candidates" have increment forms +bool ARC::ABAW::validateIncrementForms() const { + // Check first that we can transform all candidates. The ARCv3 FPU + // load/store instructions don't have all the normal addressing forms. + // Must match logic of loop in transformCandidates() + for (const auto &C : Candidates) { + MachineInstr *MI = C.first; + if (C.second.IsImm) { + unsigned NewOpcode = + GeneratePreInc ? getConversionToAW(*MI) : getConversionToAB(*MI); + if (NewOpcode == 0) + return false; + } else { + if (getFormRRAB(MI->getOpcode()) == 0) + return false; + } + } + return true; +} + +bool ARC::ABAW::transformCandidates(unsigned BaseReg, MachineBasicBlock *MBB) { + LLVM_DEBUG(dbgs() << "transformCandidates: BaseReg = " + << printReg(BaseReg, TRI) << "\n"); + unsigned PrevBaseReg = BaseReg; + unsigned NewBaseReg = 0; + + // Keep loop in sync with duplicate logic in validateIncrementForms() + for (auto &C : Candidates) { + MachineInstr *MI = C.first; + LLVM_DEBUG(dbgs() << "Change " << *MI); + NewBaseReg = MRI.createVirtualRegister(MRI.getRegClass(BaseReg)); + if (C.second.IsImm) { + unsigned NewOpcode = + GeneratePreInc ? getConversionToAW(*MI) : getConversionToAB(*MI); + assert(NewOpcode != 0 && "Cannot find opcode for register postincrement"); + int64_t NewOffset = C.second.getImm(); + if (NewOffset == 0) { + MI->getOperand(MI->mayLoad() ? 1 : 0).setReg(PrevBaseReg); + MI->getOperand(MI->mayLoad() ? 2 : 1).setImm(0); + continue; + } + // TODO + // if (unsigned AVOpcode = getVDSPAVVariant(NewOpcode)) { + // int ShiftAmt = Log2_32(ST.getVDSPVecWidth()/8); + // int64_t ShiftedVal = NewOffset >> ShiftAmt; + // if ((ShiftedVal << ShiftAmt) == NewOffset && (ShiftedVal <= 31 && + // ShiftedVal >= -32)) { + // NewOpcode = AVOpcode; + // NewOffset = ShiftedVal; + // } + // } + MI->setDesc(ST.getInstrInfo()->get(NewOpcode)); + setLoadStoreBaseOffset(*MI, NewBaseReg, PrevBaseReg, NewOffset); + } else { + unsigned NewOpcode = getFormRRAB(MI->getOpcode()); + assert(NewOpcode != 0 && "Cannot find opcode for register postincrement"); + MI->setDesc(ST.getInstrInfo()->get(NewOpcode)); + MachineOperand Opnd = MachineOperand::CreateReg(C.second.getReg(), false); + setLoadStoreBaseOffset(*MI, NewBaseReg, PrevBaseReg, Opnd); + tiedRegisterMap[BaseReg] = {NewBaseReg, Opnd.getReg(), MI}; + } + PrevBaseReg = NewBaseReg; + LLVM_DEBUG(dbgs() << "To " << *MI); + } + + // Replace Inc's result with last created new base register + assert(Increments.size() == 1); + MachineInstr *Incr = Increments[0].first; + toBeDeleted(Incr); + if (!ExternalDominatedUses.empty()) { + if (Increments[0].second.IsImm) { + fixIntermediates(Candidates.back().first, PrevBaseReg, + Increments[0].second.getImm(), BaseReg); + } else { + fixIntermediatesReg( + Candidates.back().first, // after this insns + PrevBaseReg, // use this as a new base + Increments[0].second.getReg(), // adjust by this reg + Increments[0].second.getScale(), // scaled by this value + BaseReg); // original base reg + } + } + ARC::replaceAllUsesWith(MRI, Incr->getOperand(0).getReg(), PrevBaseReg); + + return true; } //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// -FunctionPass *llvm::createARCOptAddrMode() { return new ARCOptAddrMode(); } +FunctionPass *llvm::createARCOptAddrMode(const ARCSubtarget &ST) { + return new ARCOptAddrMode(ST); +} Index: llvm/lib/Target/ARC/ARCSubtarget.h =================================================================== --- llvm/lib/Target/ARC/ARCSubtarget.h +++ llvm/lib/Target/ARC/ARCSubtarget.h @@ -63,6 +63,18 @@ } bool hasNorm() const { return Xnorm; } + + // TODO: make configurable + bool isDSPv1() const { return false; } + + // TODO: make configurable + bool hasMpy() const { return true; } + + // TODO add ARC64 support + bool isArc64() const { return false; } + + // TODO: make configurable + bool hasMpy64() const { return false; } }; } // end namespace llvm Index: llvm/lib/Target/ARC/ARCTargetMachine.h =================================================================== --- llvm/lib/Target/ARC/ARCTargetMachine.h +++ llvm/lib/Target/ARC/ARCTargetMachine.h @@ -31,6 +31,7 @@ CodeGenOpt::Level OL, bool JIT); ~ARCTargetMachine() override; + const ARCSubtarget &getSubtarget() const { return Subtarget; } const ARCSubtarget *getSubtargetImpl() const { return &Subtarget; } const ARCSubtarget *getSubtargetImpl(const Function &) const override { return &Subtarget; Index: llvm/lib/Target/ARC/ARCTargetMachine.cpp =================================================================== --- llvm/lib/Target/ARC/ARCTargetMachine.cpp +++ llvm/lib/Target/ARC/ARCTargetMachine.cpp @@ -47,9 +47,12 @@ /// ARC Code Generator Pass Configuration Options. class ARCPassConfig : public TargetPassConfig { +private: + const ARCSubtarget &ST; + public: ARCPassConfig(ARCTargetMachine &TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM), ST(TM.getSubtarget()) {} ARCTargetMachine &getARCTargetMachine() const { return getTM(); @@ -74,8 +77,8 @@ void ARCPassConfig::addPreEmitPass() { addPass(createARCBranchFinalizePass()); } void ARCPassConfig::addPreRegAlloc() { - addPass(createARCExpandPseudosPass()); - addPass(createARCOptAddrMode()); + addPass(createARCExpandPseudosPass()); + addPass(createARCOptAddrMode(ST)); } // Force static initialization. Index: llvm/lib/Target/ARC/ARCUtil.h =================================================================== --- /dev/null +++ llvm/lib/Target/ARC/ARCUtil.h @@ -0,0 +1,382 @@ +//===- ARCUtil.h ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file defines utility functions and classes used throughout +/// the ARC code generator. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARC_ARCUTIL_H +#define LLVM_LIB_TARGET_ARC_ARCUTIL_H + +#include "ARC.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +namespace llvm { +#if 0 +} fix emacs; +#endif + +class MachineDominatorTree; + +namespace ARC { +#if 0 +} fix emacs; +#endif + +// Determine if the MachineFunction should be optimized for code size +// over performance +extern bool isOptimizeForSpace(const MachineFunction &); + +// Return the virtual Register number when the operand is a virtual +// register otherwise return zero +inline Register getVReg(const MachineOperand &o) { + if (!o.isReg()) + return 0; + auto R = o.getReg(); + if (!Register::isVirtualRegister(R)) + return 0; + return R; +} + +// Return the physical Register number when the operand is a physical +// register otherwise return zero +inline Register getPReg(const MachineOperand &o) { + if (!o.isReg()) + return 0; + auto R = o.getReg(); + if (!Register::isPhysicalRegister(R)) + return 0; + return R; +} + +// Determine if the given virtual Register is unused (a debug info use +// is excluded). False is returned if the Register provided is not a +// virtual Register. +static inline bool isUnusedVReg(const MachineRegisterInfo &MRI, Register R) { + if (!Register::isVirtualRegister(R)) + return false; + return MRI.use_nodbg_empty(R); +} + +// Determine if the given virtual Register is unused (a debug info use +// is excluded). False is returned if the Register provided is not a +// virtual Register. +static inline bool isUnusedVReg(const MachineRegisterInfo &MRI, + const MachineOperand &opd) { + if (!opd.isReg()) + return false; + return isUnusedVReg(MRI, opd.getReg()); +} + +// Find the unique definition of a virtual Register else return +// nullptr +static inline MachineInstr *getVRegDef(const MachineRegisterInfo &MRI, + Register RegNo) { + if (!Register::isVirtualRegister(RegNo)) + return nullptr; + if (!MRI.hasOneDef(RegNo)) + return nullptr; + return MRI.getVRegDef(RegNo); +} + +// Find the unique definition of a virtual Register else return +// nullptr +static inline MachineInstr *getVRegDef(const MachineRegisterInfo &RINFO, + const MachineOperand &o) { + if (!o.isReg()) + return nullptr; + if (o.isUndef()) + return nullptr; + return getVRegDef(RINFO, o.getReg()); +} + +// Create a new virtual register that has the same register class as "virtReg" +extern Register createVirtReg(MachineRegisterInfo *, Register virtReg); + +// Create a new virtual register that has the same register class as +// "virtRegOpd" +extern Register createVirtReg(MachineRegisterInfo *, const MachineOperand &); + +// Find the constant value of the operand +extern bool getImmed(MachineRegisterInfo &MRI, const MachineOperand &opd, + int64_t &imm); + +// is "MI" a load or store that can handle "offset" as a displacement? +bool isLoadStoreThatCanHandleDisplacement(const MachineInstr &MI, + int64_t offset); + +// Determine if MI is an add instruction with a constant operand +bool isAddConstantOp(const MachineInstr &MI, int64_t *amount = nullptr); + +// Insert an add-by-constant, using the most efficient 32-bit opcode. +MachineInstr *buildAddByConstant(const MachineRegisterInfo &, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertPoint, + DebugLoc DL, Register DestReg, Register SrcReg, + int constant); + +// Returns true if machine instruction MI dominates all uses of virtual register +// VReg +bool dominatesAllUsesOf(MachineInstr *, Register VReg, MachineDominatorTree *, + MachineRegisterInfo *); + +// Is "reg" read between MI (exclusive) and ME (exclusive)? +// Should work on virtual or physical registers. +// If "stopIfModified" is true, then return false if the (physical) register +// is modified before being read. +bool isUsedBetween(Register reg, MachineBasicBlock::const_iterator MI, + MachineBasicBlock::const_iterator ME, + bool isExclusiveMI = true, bool stopIfTrashed = false); + +// Replace all uses of "FromReg" to "ToReg". Does not modify defs +void replaceAllUsesWith(MachineRegisterInfo &, Register FromReg, + Register ToReg); + +// A visitor pattern class used throughout the ARC code generator to +// write optimizations in a structured way +class InstructionVisitor { +public: + enum IterationDirection { Forward, Reverse }; + +protected: + MachineFunction &MF; + const ARCSubtarget &ST; + const TargetRegisterInfo *TRI; + MachineRegisterInfo &MRI; + bool changed = false; // Records if any changes have been made to MF + bool quit = false; // Records that the user wants to stop the iteration + + // A container to hold instructions which will be deleted upon + // completion of iterating over a MachineBasicBlock + SmallVector tbd; + + // Deletes each MachineInstr in container "tbd" + void deletePending(); + + // Register an instruction to be deleted by a later call to "deletePending()". + // If the instruction is not going to be referenced later, + // then pass "clearOperands" as true so that there are no longer any + // register references to the soon-to-be-deleted instruction. + void toBeDeleted(MachineInstr *MI, bool ClearOperands = false); + + // Iterates and calls the visitor methods for each MachineInstr in + // the MachineBasicBlock in forward direction [first..last] + bool iterateBlockForward(MachineBasicBlock &); + + // Iterates and calls the visitor methods for each MachineInstr in + // the MachineBasicBlock in reverse direction [last..first] + bool iterateBlockBackward(MachineBasicBlock &); + + // Iterates and calls the visitor methods for each MachineInstr in + // the MachineBasicBlock in the specified direction. It will repeat + // up to \p maxRepetitions times but terminates if no changes were + // made. + bool iterateBlock(MachineBasicBlock &, IterationDirection, + unsigned maxRepetitions); + + // Iterates over every MachineInstr in every MachineBasicBlock and + // calls the visitor methods in the specified direction. It will + // repeat up to \p maxRepetitions times but terminates if no changes + // were made. + bool iterate(IterationDirection, unsigned maxRepetitions); + + // Handles calling visit() and the pre/post visit callouts as well + // as maintaining the "changed" member variable + bool visitOneInstruction(MachineInstr &); + +public: + InstructionVisitor(const ARCSubtarget &, MachineFunction &); + virtual ~InstructionVisitor() {} + + // This is the main method expected to be overriden by the user. It + // will be called for each MachineInstr in each MachineBasicBlock in + // the IterationDirection specified by the user. The returned value + // should be "true" if any changes where made to the + // MachineFunction. + virtual bool visit(MachineInstr &) { return false; } + + // Determine if any changes have been made to the MachineFunction + bool anyChanges() const { return changed; } + + // Return "true" if the specified MachineBasicBlock should be + // excluded from the current iteration + virtual bool excludeBlock(MachineBasicBlock &) { return false; } + + // Specify that the iteration should terminate immediately + void halt() { quit = true; } + + // Visitor which is called prior to iterating over the specified + // MachineBasicBlock + virtual void preBlockCallout(MachineBasicBlock &) {} + + // Visitor which is called after iterating over the specified + // MachineBasicBlock + virtual void postBlockCallout(MachineBasicBlock &, bool BlockWasChanged) {} + + // Visitor which is called prior to calling visit() for the + // specified MachineInstr + virtual void preVisitCallout(MachineInstr &) {} + + // Visitor which is called after to calling visit() for the + // specified MachineInstr + virtual void postVisitCallout(MachineInstr &, bool) {} + + // Examine each instruction in the given basic block in forward order. + // The callback/visitor is called after advancing the iterator, so it + // is allowed to change/delete the current instruction, but it is + // unsafe to delete instructions forward of the one being visited. + bool examineEachInstr(MachineBasicBlock &MBB) { + return iterateBlock(MBB, Forward, 1); + } + + // Examine each instruction in the given basic block in reverse order. + // The callback/visitor is called after advancing the iterator, so it + // is allowed to change/delete the current instruction, but it is + // unsafe to delete instructions before of the one being visited. + bool examineEachInstrInReverse(MachineBasicBlock &MBB) { + return iterateBlock(MBB, Reverse, 1); + } + + // NOTE: The callback/visitor is called after advancing the iterator, + // so it is allowed to change/delete the current instruction, but it + // is unsafe to delete instructions before of the one being visited. + bool examineEachInstrInReverse() { return iterate(Reverse, 1); } + + // NOTE: The callback/visitor is called after advancing the iterator, + // so it is allowed to change/delete the current instruction, but it + // is unsafe to delete instructions before of the one being visited. + bool examineEachInstrInReverseRepeatedly(unsigned maxRepetitions) { + return iterate(Reverse, maxRepetitions); + } + + // NOTE: The callback/visitor is called after advancing the iterator, + // so it is allowed to change/delete the current instruction, but it + // is unsafe to delete instructions forward of the one being visited. + bool examineEachInstr() { return iterate(Forward, 1); } + + // NOTE: The callback/visitor is called after advancing the iterator, + // so it is allowed to change/delete the current instruction, but it + // is unsafe to delete instructions forward of the one being visited. + bool examineEachInstrRepeatedly(unsigned maxRepetitions) { + return iterate(Forward, maxRepetitions); + } + + // Attempt to compute the value of the specified operand. If + // successful the value is written to the output int64_t parameter + // and "true" is returned. + virtual bool getImmed(const MachineOperand &, + /*Output*/ int64_t &) const; + + // Attempt to compute the value of the specified operand. If + // successful and the known value matches \p val return "true". + bool isImmedVal(const MachineOperand &opd, int64_t val) const { + int64_t n; + return getImmed(opd, n) && n == val; + } + + // Getter for getting the bitcode Function for the current + // MachineFunction + Function &getFunction() const { return MF.getFunction(); } + +}; // end class ARC::InstructionVisitor + +// ARC::InstructionVisitor for a MachineFunction prior to register +// allocation. It provides additional functionality that pertains to a +// MachineFunction prior to register allocation (while in SSA form). +class SsaInstructionVisitor : public InstructionVisitor { +public: + SsaInstructionVisitor(const ARCSubtarget &ST, MachineFunction &MF) + : InstructionVisitor(ST, MF) {} + virtual ~SsaInstructionVisitor() {} + + // Find the SSA definition for the specified virtual register + MachineInstr *getSsaDef(Register RegNum) const { + return ARC::getVRegDef(MRI, RegNum); + } + + // Find the SSA definition for the specified virtual register + MachineInstr *getSsaDef(const MachineOperand &opd) const { + return ARC::getVRegDef(MRI, opd); + } + + // Returns true if the virtual register has exactly 1 non-debug use + bool hasOneUse(Register RegNum) const { + if (!Register::isVirtualRegister(RegNum)) + return false; + if (!MRI.hasOneDef(RegNum)) + return false; + return MRI.hasOneNonDBGUse(RegNum); + } + + // Returns true if the virtual register has exactly 1 non-debug use + bool hasOneUse(const MachineOperand &opd) const { + return (opd.isReg()) ? hasOneUse(opd.getReg()) : false; + } + + // Return the SSA definition of a virtual register IFF it has + // exactly 1 non-debug use + MachineInstr *getSingleUseSsaDef(Register RegNum) const { + if (!Register::isVirtualRegister(RegNum)) + return nullptr; + if (!MRI.hasOneDef(RegNum)) + return nullptr; + if (!MRI.hasOneNonDBGUse(RegNum)) + return nullptr; + return MRI.getVRegDef(RegNum); + } + + // Return the SSA definition of a virtual register IFF it has + // exactly 1 non-debug use + MachineInstr *getSingleUseSsaDef(const MachineOperand &opd) const { + return (opd.isReg()) ? getSingleUseSsaDef(opd.getReg()) : nullptr; + } + + // Return the unique use of a virtual register + MachineOperand *getUniqueSsaUse(Register RegNum) const { + if (!Register::isVirtualRegister(RegNum)) + return nullptr; + if (!MRI.hasOneNonDBGUse(RegNum)) + return nullptr; + return &(*MRI.use_nodbg_begin(RegNum)); + } + + // Return the unique use of a virtual register + MachineOperand *getUniqueSsaUse(const MachineOperand &opd) const { + return (opd.isReg()) ? getUniqueSsaUse(opd.getReg()) : nullptr; + } + + // Return true if the specified virtual register has NO uses + bool isUnusedVirtReg(const MachineOperand &opd) const { + return isUnusedVReg(MRI, opd); + } + + // Create a virtual register with the same register class as the + // specified virtual register + Register createVirtReg(Register virtReg) { + return ARC::createVirtReg(&MRI, virtReg); + } + + // Create a virtual register with the same register class as the + // specified virtual register + Register createVirtReg(const MachineOperand &virtRegOpd) { + return ARC::createVirtReg(&MRI, virtRegOpd); + } + + // Find the manifold constant integer value of the specified + // MachineOperand. Returns "true" if the value could be determined. + bool getImmed(const MachineOperand &, /*Output*/ int64_t &) const override; + +}; // end class ARC::SsaInstructionVisitor + +} // End namespace ARC + +} // End namespace llvm + +#endif // LLVM_LIB_TARGET_ARC_ARCUTIL_H Index: llvm/lib/Target/ARC/ARCUtil.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/ARC/ARCUtil.cpp @@ -0,0 +1,455 @@ +//===- ARCUtil.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file defines utility functions and classes used throughout +/// the ARC code generator. +//===----------------------------------------------------------------------===// + +#include "ARCUtil.h" +#include "ARC.h" +#include "ARCSubtarget.h" +#include "ARCUtil.h" +#include "MCTargetDesc/ARCMCUtil.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineDominators.h" + +using namespace llvm; + +// Determine if the function was compiled with -Os or with attribute +// minsize +bool ARC::isOptimizeForSpace(const MachineFunction &MF) { + return MF.getFunction().hasFnAttribute(Attribute::OptimizeForSize); +} + +// Create a new virtual register that has the same register class as "virtReg" +Register ARC::createVirtReg(MachineRegisterInfo *MRI, Register virtReg) { + assert(Register::isVirtualRegister(virtReg)); + const TargetRegisterClass *RC = MRI->getRegClass(virtReg); + return MRI->createVirtualRegister(RC); +} + +// Create a new virtual register that has the same register class as +// "virtRegOpd" +Register ARC::createVirtReg(MachineRegisterInfo *MRI, + const MachineOperand &virtRegOpd) { + assert(virtRegOpd.isReg()); + return createVirtReg(MRI, virtRegOpd.getReg()); +} + +// Create the appropriate ADD MachineInstr for the given register +// classes and specified constant +MachineInstr *ARC::buildAddByConstant(const MachineRegisterInfo &MRI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertPoint, + DebugLoc DL, Register DestReg, + Register SrcReg, int constant) { + unsigned addOp = 0; + bool is32bit = true; + // TODO + // if (Register::isVirtualRegister(DestReg)) { + // if (MRI.getRegClass(DestReg) == &ARC::WideCoreRegClass || + // MRI.getRegClass(DestReg) == &ARC::WideCore8RegClass) + // is32bit = false; + // } else if (ARC::isReg64(DestReg)) { + // is32bit = false; + // } + if (constant > 0) { + // TODO + // if (constant <= 0x3F) addOp = is32bit ? ARC::ADD_rru6 : ARC::ADDL_rru6; + if (constant <= 0x3F) + addOp = is32bit ? ARC::ADD_rru6 : ARC::ADD_rru6; + // TODO + // else if ( (constant & 0x7E) == constant) addOp = is32bit ? + // ARC::ADD1SCALED_rru6 : ARC::ADD1LSCALED_rru6; else if ( (constant & 0xFC) + // == constant) addOp = is32bit ? ARC::ADD2SCALED_rru6 : + // ARC::ADD2LSCALED_rru6; else if ( (constant & 0x1F8) == constant) addOp = + // is32bit ? ARC::ADD3SCALED_rru6 : ARC::ADD3LSCALED_rru6; + // TODO + // else if (constant < 2048) addOp = is32bit ? ARC::ADD_rrs12 : + // ARC::ADDL_rrs12; else addOp = is32bit ? ARC::ADD_rrlimm : + // ARC::ADDL_rrlimm; + else if (constant < 2048) + addOp = is32bit ? ARC::ADD_rrs12 : ARC::ADD_rrs12; + else + addOp = is32bit ? ARC::ADD_rrlimm : ARC::ADD_rrlimm; + } else if (constant == 0) { + return BuildMI( + MBB, InsertPoint, DL, + MBB.getParent()->getSubtarget().getInstrInfo()->get(ARC::COPY)) + .addReg(DestReg, RegState::Define) + .addReg(SrcReg, 0); + } else { + constant = -constant; + // TODO + // if (constant <= 0x3F) addOp = is32bit ? ARC::SUB_rru6 : ARC::SUBL_rru6; + if (constant <= 0x3F) + addOp = is32bit ? ARC::SUB_rru6 : ARC::SUB_rru6; + // TODO + // else if ( (constant & 0x7E) == constant) addOp = is32bit ? + // ARC::SUB1SCALED_rru6 : ARC::SUB1LSCALED_rru6; else if ( (constant & 0xFC) + // == constant) addOp = is32bit ? ARC::SUB2SCALED_rru6 : + // ARC::SUB2LSCALED_rru6; else if ( (constant & 0x1F8) == constant) addOp = + // is32bit ? ARC::SUB3SCALED_rru6 : ARC::SUB3LSCALED_rru6; else if (constant + // < 2048) {addOp = is32bit ? ARC::ADD_rrs12 : ARC::ADDL_rrs12; constant = + // -constant; } + // TODO + // else addOp = is32bit ? ARC::SUB_rrlimm : ARC::SUBL_rrlimm; + else + addOp = is32bit ? ARC::SUB_rrlimm : ARC::SUB_rrlimm; + } + return BuildMI(MBB, InsertPoint, DL, + MBB.getParent()->getSubtarget().getInstrInfo()->get(addOp)) + .addReg(DestReg, RegState::Define) + .addReg(SrcReg, 0) + .addImm(constant); +} + +// Determine if \p amount is in the specified range and has the +// appropriate alignment (mask) +static bool isAdjustedConstantInRange(const MachineOperand &opd, int64_t amount, + int64_t lo, int64_t hi, int64_t mask) { + if (opd.isImm()) { + int64_t k = opd.getImm() + amount; + return (mask & k) == 0 && k >= lo && k <= hi; + } + return false; +} + +// is "MI" a load or store that can handle "offset" as a displacement? +bool ARC::isLoadStoreThatCanHandleDisplacement(const MachineInstr &MI, + int64_t offset) { + int LogScale = 0; + // This function used to be isS9LoadStore, with no pre/post increment forms + // recognized. Setting AllowInc to false preserves that behaviour. + if (ARC::isS9LoadStoreOpcode(MI.getOpcode(), LogScale, /*AllowInc=*/false)) { + // NB: mayLoad rather than mayStore as the above includes PREFETCH ops + // without a dest register. + int ImmIndex = MI.mayLoad() ? 2 : 1; + if (MI.getOperand(ImmIndex).isImm()) + return isAdjustedConstantInRange(MI.getOperand(ImmIndex), offset, + -(256 << LogScale), 255 << LogScale, + (1u << LogScale) - 1); + } + return false; +} + +// Determine if MI is an add instruction with a constant operand +bool ARC::isAddConstantOp(const MachineInstr &MI, int64_t *amount) { + switch (MI.getOpcode()) { +#undef CASE +#define CASE(OP, Sign, Shift) \ + case ARC::OP##_rrlimm: \ + case ARC::OP##_rrs12: \ + case ARC::OP##_rru6: \ + if (MI.getOperand(2).isImm()) { \ + if (amount) \ + *amount = Sign * MI.getOperand(2).getImm() << Shift; \ + return true; \ + } \ + break + CASE(ADD, 1, 0); + CASE(ADD1, 1, 1); + CASE(ADD2, 1, 2); + CASE(ADD3, 1, 3); + CASE(SUB, -1, 0); + CASE(SUB1, -1, 1); + CASE(SUB2, -1, 2); + CASE(SUB3, -1, 3); + } + return false; +} + +// Determine if \p MI dominates all uses of the specifed virtual +// register +bool ARC::dominatesAllUsesOf(MachineInstr *MI, Register VReg, + MachineDominatorTree *MDT, + MachineRegisterInfo *MRI) { + + assert(Register::isVirtualRegister(VReg) && "Expected virtual register!"); + + for (auto it = MRI->use_nodbg_begin(VReg), end = MRI->use_nodbg_end(); + it != end; ++it) { + MachineInstr *User = it->getParent(); + if (User->isPHI()) { + unsigned BBOperandIdx = User->getOperandNo(&*it) + 1; + MachineBasicBlock *MBB = User->getOperand(BBOperandIdx).getMBB(); + if (MBB->empty()) { + MachineBasicBlock *InstBB = MI->getParent(); + assert(InstBB != MBB && "Instruction found in empty MBB"); + if (!MDT->dominates(InstBB, MBB)) + return false; + continue; + } + User = &*MBB->rbegin(); + } + + if (!MDT->dominates(MI, User)) + return false; + } + return true; +} + +// Is "reg" read between MI (exclusive) and ME (exclusive)? +// +// Specifically, if this function returns false, then an instruction that +// defines "reg" at MI can be moved down to before ME, assuming all other +// constraints have been checked. +// +// Should work on virtual or physical registers. +bool ARC::isUsedBetween(Register reg, MachineBasicBlock::const_iterator MI, + MachineBasicBlock::const_iterator ME, + bool isExclusiveMI, bool stopIfModified) { + MachineBasicBlock::const_iterator end = MI->getParent()->end(); + if (MI == ME) + return false; + assert((end == ME || MI->getParent() == ME->getParent()) && + "Not in same block"); + + const TargetRegisterInfo *TRI = + MI->getParent()->getParent()->getSubtarget().getRegisterInfo(); + + // If we're testing an extension register or possibly XY register, then resort + // to more complex logic. +#if 0 // TODO upstream + if (Register::isPhysicalRegister(reg) && ARC::getRegToIndex(reg) >= 32){ + const ArcRegisterInfo *ATRI = static_cast(TRI); + if (ATRI->isXYReg(reg)) // Deal with XY... + return isXYUsedBetween(reg,MI,ME,ATRI,isExclusiveMI); + const ArcModuleInfo &MINFO = ArcModuleInfo::Find(MI->getParent()->getParent()->getFunction().getParent()); + const ArcModuleInfo::RegDesc *rd = MINFO.getRegister(reg); + if (rd != nullptr && rd->sideEffectOnRead) + return isExtensionRegUsedBetween(reg,*rd,MI,ME,ATRI,MINFO, isExclusiveMI); + } +#endif + + if (isExclusiveMI) + ++MI; + for (; MI != ME && MI != end; ++MI) { + if (MI->isDebugInstr()) + continue; + if (MI->readsRegister(reg, TRI)) + return true; + if (stopIfModified && MI->modifiesRegister(reg, TRI)) + break; + } + return false; +} + +// Replace all uses of "FromReg" to "ToReg". Does not modify defs +void ARC::replaceAllUsesWith(MachineRegisterInfo &MRI, Register FromReg, + Register ToReg) { + assert(FromReg != ToReg && "Cannot replace a reg with itself"); + // The logic of this loop was copied from + // MachineRegisterInfo::replaceRegWith(...) + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg), + E = MRI.use_end(); + I != E;) { + MachineOperand &O = *I; + ++I; + O.setReg(ToReg); + } +} + +// Handles calling visit() and the pre/post visit callouts as well +// as maintaining the "changed" member variable +bool ARC::InstructionVisitor::visitOneInstruction(MachineInstr &inst) { + preVisitCallout(inst); + bool instChanged = visit(inst); + changed |= instChanged; + postVisitCallout(inst, instChanged); + return instChanged; +} + +bool ARC::InstructionVisitor::iterateBlockForward(MachineBasicBlock &MBB) { + if (excludeBlock(MBB)) + return false; + quit = false; + preBlockCallout(MBB); + bool blockChanged = false; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E && !quit;) { + // NOTE: The callback/visitor is called after advancing the iterator, + // so it is allowed to change/delete the current instruction. + MachineBasicBlock::iterator J = I; + ++I; + bool instChanged = visitOneInstruction(*J); + blockChanged |= instChanged; + } + deletePending(); + postBlockCallout(MBB, blockChanged); + return blockChanged; +} + +bool ARC::InstructionVisitor::iterateBlockBackward(MachineBasicBlock &MBB) { + if (excludeBlock(MBB)) + return false; + quit = false; + preBlockCallout(MBB); + bool blockChanged = false; + // Ugliness to be sure reverse_iterator is moved off of + // the current instruction in case we delete it + MachineInstr *p[2] = {nullptr, nullptr}; + for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); + I != E && !quit; ++I) { + MachineInstr &Inst = *I; + MachineInstr *T = p[0]; + p[0] = p[1]; + p[1] = &Inst; + if (T) { + bool instChanged = visitOneInstruction(*T); + blockChanged |= instChanged; + } + } + for (unsigned i = 0; i < 2 && !quit; ++i) { + if (p[i]) { + bool instChanged = visitOneInstruction(*p[i]); + blockChanged |= instChanged; + } + } + deletePending(); + postBlockCallout(MBB, blockChanged); + return blockChanged; +} + +bool ARC::InstructionVisitor::iterateBlock(MachineBasicBlock &MBB, + IterationDirection direction, + unsigned maxRepetitions) { + if (direction == Forward) { + for (unsigned K = 0; K < maxRepetitions; ++K) { + if (!iterateBlockForward(MBB)) + break; + } + } else { + for (unsigned K = 0; K < maxRepetitions; ++K) { + if (!iterateBlockBackward(MBB)) + break; + } + } + return changed; +} + +bool ARC::InstructionVisitor::iterate(IterationDirection direction, + unsigned maxRepetitions) { + if (direction == Forward) { + bool changing = true; + for (unsigned K = 0; changing && K < maxRepetitions; ++K) { + changing = false; + ReversePostOrderTraversal RPOT(&MF); + for (ReversePostOrderTraversal::rpo_iterator + I = RPOT.begin(), + E = RPOT.end(); + I != E; ++I) { + auto *MBB = *I; + // for (auto& MBB : MF) { + // Update "changing" because we iterate "while changing" + // up to "maxRepititions". + changing |= iterateBlockForward(*MBB); + // changed is handled in visitOneInstruction() + } + } + } else { + bool changing = true; + for (unsigned K = 0; changing && K < maxRepetitions; ++K) { + changing = false; + for (po_iterator I = po_begin(&MF), E = po_end(&MF); + I != E; ++I) { + MachineBasicBlock *MBB = *I; + // Update "changing" because we iterate "while changing" + // up to "maxRepititions". + changing |= iterateBlockBackward(*MBB); + // changed is handled in visitOneInstruction() + } + } + } + // Return if anything ever changed while iterating over the entire + // MachineFunction + return anyChanges(); +} + +ARC::InstructionVisitor::InstructionVisitor(const ARCSubtarget &st, + MachineFunction &mf) + : MF(mf), ST(st), TRI(ST.getRegisterInfo()), MRI(mf.getRegInfo()) {} + +bool ARC::InstructionVisitor::getImmed(const MachineOperand &opd, + int64_t &imm) const { + if (opd.isImm()) { + imm = opd.getImm(); + return true; + } + return false; +} + +bool ARC::SsaInstructionVisitor::getImmed(const MachineOperand &opd, + int64_t &imm) const { + if (InstructionVisitor::getImmed(opd, imm)) + return true; + return ARC::getImmed(MRI, opd, imm); +} + +// Find the constant value of the operand +bool ARC::getImmed(MachineRegisterInfo &MRI, const MachineOperand &opd, + int64_t &imm) { + if (opd.isImm()) { + imm = opd.getImm(); + return true; + } + MachineInstr *def = ARC::getVRegDef(MRI, opd); + if (def == nullptr) + return false; + switch (def->getOpcode()) { + case TargetOpcode::COPY: + // When it is a sub-register we may have to truncate it + if (def->getOperand(1).getSubReg()) + break; + return ARC::getImmed(MRI, def->getOperand(1), imm); + case ARC::MOV_ru6: + case ARC::MOV_rs12: + case ARC::MOV_rlimm: + if (def->getOperand(1).isImm()) { + imm = def->getOperand(1).getImm(); + return true; + } + break; +#undef CASE +#define CASE(OP, size) \ + case ARC::OP##_rr: \ + if (getImmed(MRI, def->getOperand(1), imm)) { \ + imm <<= 64 - size; \ + imm >>= 64 - size; \ + return true; \ + } \ + break + CASE(SEXB, 8); + CASE(SEXH, 16); + } // end switch + return false; +} + +void ARC::InstructionVisitor::toBeDeleted(MachineInstr *inst, + bool clearOperands) { + // Change the descrptor so current iteration does not trip some other + // optimization on a deleted instruction + if (inst && inst->getOpcode() != ARC::TBD) { + inst->setDesc(ST.getInstrInfo()->get(ARC::TBD)); + if (clearOperands) { + // Remove all operands so that we don't see reg references. + while (inst->getNumOperands() > 0) + inst->RemoveOperand(inst->getNumOperands() - 1); + } + tbd.push_back(inst); + } +} + +void ARC::InstructionVisitor::deletePending() { + for (auto *MI : tbd) { + MI->eraseFromParent(); + changed = true; + } + tbd.clear(); +} Index: llvm/lib/Target/ARC/CMakeLists.txt =================================================================== --- llvm/lib/Target/ARC/CMakeLists.txt +++ llvm/lib/Target/ARC/CMakeLists.txt @@ -26,6 +26,7 @@ ARCRegisterInfo.cpp ARCSubtarget.cpp ARCTargetMachine.cpp + ARCUtil.cpp LINK_COMPONENTS Analysis Index: llvm/lib/Target/ARC/MCTargetDesc/ARCMCUtil.h =================================================================== --- /dev/null +++ llvm/lib/Target/ARC/MCTargetDesc/ARCMCUtil.h @@ -0,0 +1,62 @@ +//===- ARCMCUtil.h +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file defines utility functions and classes used throughout +/// the ARC MC and code generator layers. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARC_MCTARGETDESC_ARCMCUTIL_H +#define LLVM_LIB_TARGET_ARC_MCTARGETDESC_ARCMCUTIL_H + +#include "ARC.h" + +namespace llvm { +#if 0 +} fix emacs; +#endif + +namespace ARC { +#if 0 +} fix emacs; +#endif + +// Get load/store RS9 form +unsigned getConversionToRS9(unsigned); + +// Get load/store AW form (pre-increment) +unsigned getConversionToAW(unsigned); + +// Get load/store AB form (post-increment) +unsigned getConversionToAB(unsigned); + +// Get load/store RRAW form (indexed pre-increment) +unsigned getConversionToRRAW(unsigned); + +// Get load/store RRAB form (indexed post-increment) +unsigned getConversionToRRAB(unsigned); + +// Get load/store from indexed to RRAB form (indexed post-increment) +unsigned getConversionFromRRToRRAB(unsigned); + +// Get load/store from scaled indexed to RRAB form (indexed post-increment) +unsigned getConversionFromScaledToRRAB(unsigned); + +// This is adapted from the previous implementation of +// isLoadStoreThatCanHandleDisplacement. That implementation excluded pre/post +// indexed forms, presumably to avoid modifying those forms. Instead, we add a +// parameter to allow these forms or not. +// TODO: the no-di cases have irs9 forms based on the current TD files, but are +// omitted here. is this right? +bool isS9LoadStoreOpcode(unsigned Opc, int &LogScale, bool AllowInc); + +} // End namespace ARC + +} // End namespace llvm + +#endif // LLVM_LIB_TARGET_ARC_MCTARGETDESC_ARCMCUTIL_H Index: llvm/lib/Target/ARC/MCTargetDesc/ARCMCUtil.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/ARC/MCTargetDesc/ARCMCUtil.cpp @@ -0,0 +1,205 @@ +//===- ARCMCUtil.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file defines utility functions and classes used throughout +/// the ARC MC and code generator layers. +//===----------------------------------------------------------------------===// + +#include "ARCMCUtil.h" +#include "../ARC.h" + +using namespace llvm; + +// Get load/store RS9 form +unsigned ARC::getConversionToRS9(unsigned OP) { + // TODO upstream indexed addressing mode + return 0; +} + +// Get load/store AW form (pre-increment) +unsigned ARC::getConversionToAW(unsigned OP) { + // TODO upstream ARC64 and FPU opcodes + // TODO upstream signed and unsigned limm offsets + switch (OP) { +#undef CASE +#define CASE(OP) \ + case ARC::OP##_rs9: \ + return ARC::OP##_AW_rs9; \ + case ARC::OP##_DI_rs9: \ + return ARC::OP##_DI_AW_rs9 + // CASE(LDU); + CASE(LDB_X); + // CASE(WIDE_LDSB); + CASE(LDH_X); + // CASE(WIDE_LDSH); + CASE(LD); + // CASE(LDD); + CASE(LDB); + CASE(LDH); + // TODO add immediate W6 forms + // CASE(STD); + CASE(ST); + CASE(STH); + CASE(STB); + // TODO upstream prefetch & VDSP + } + return 0; +} + +// Get load/store AB form (post-increment) +unsigned ARC::getConversionToAB(unsigned OP) { + // TODO upstream ARC64 and FPU opcodes + // TODO upstream signed and unsigned limm offsets + switch (OP) { +#undef CASE +#define CASE(OP) \ + case ARC::OP##_rs9: \ + return ARC::OP##_AB_rs9; \ + case ARC::OP##_DI_rs9: \ + return ARC::OP##_DI_AB_rs9 + CASE(LD); + CASE(LDB_X); + CASE(LDB); + CASE(LDH_X); + CASE(LDH); + // TODO upstream prefetch & VDSP + } + return 0; +} + +// TODO/FIXME Support all forms of indexed addressing + +// Get load/store RRAW form (indexed pre-increment) +unsigned ARC::getConversionToRRAW(unsigned OP) { return 0; } + +// Get load/store RRAB form (indexed post-increment) +unsigned ARC::getConversionToRRAB(unsigned OP) { return 0; } + +// Get load/store from indexexed to RRAB form (indexed post-increment) +unsigned ARC::getConversionFromRRToRRAB(unsigned OP) { return 0; } + +// Get load/store from scaled indexed to RRAB form (indexed post-increment) +unsigned ARC::getConversionFromScaledToRRAB(unsigned OP) { return 0; } + +// This is adapted from the previous implementation of +// isLoadStoreThatCanHandleDisplacement. That implementation excluded pre/post +// indexed forms, presumably to avoid modifying those forms. Instead, we add a +// parameter to allow these forms or not. +// TODO: the no-di cases have irs9 forms based on the current TD files, but are +// omitted here. is this right? +bool ARC::isS9LoadStoreOpcode(unsigned OP, int &LogScale, bool AllowInc) { + switch (OP) { +#undef CASE +#define CASE(LD, Shift) \ + case ARC::LD##_rs9: \ + case ARC::LD##_DI_rs9: \ + LogScale = Shift; \ + return true +#undef CASE_NO_DI +#define CASE_NO_DI(LD, Shift) \ + case ARC::LD##_rs9: \ + LogScale = Shift; \ + return true + CASE(LDB_X, 0); + // TODO CASE(WIDE_LDSB); + CASE(LDB, 0); + // case ARC::LDSB_rs9_aq: + // case ARC::LDUB_rs9_aq: + CASE(LDH_X, 1); + // CASE(WIDE_LDSH): + CASE(LDH, 1); + // CASE_NO_DI(FLD16): + // case ARC::LDSH_rs9_aq: + // case ARC::LDUH_rs9_aq: + CASE(LD, 2); + // CASE(LDD): + // CASE_NO_DI(LDS): + // CASE(LDU): + // CASE_NO_DI(FLD32): + // CASE_NO_DI(FLD64): + // CASE_NO_DI(FLD128): + // case ARC::LD_rs9_aq: + // CASE_NO_DI(LDL): + // CASE_NO_DI(LDDL): + // case ARC::LDL_rs9_aq: + // LogScale = 3; + // return true; + + // TODO upstream STORE W6 opcodes + CASE(ST, 2); + // CASE(STD): + // CASE_NO_DI(FST128): + // CASE_NO_DI(FST64): + // CASE_NO_DI(FST32): + // case ARC::PREFETCH_rs9: + // case ARC::PREFETCHW_rs9: + // case ARC::PREALLOC_rs9: + // case ARC::ST_rs9_rl: + CASE(STH, 1); + // CASE_NO_DI(FST16): + // case ARC::STH_rs9_rl: + CASE(STB, 0); + // TODO upstream limmrs9 addr mode + // case ARC::ST_limmrs9: + // case ARC::STD_limmrs9: + // case ARC::STH_limmrs9: + // case ARC::STB_limmrs9: + // case ARC::ST_DI_limmrs9: + // case ARC::STD_DI_limmrs9: + // case ARC::STH_DI_limmrs9: + // case ARC::STB_DI_limmrs9: + // case ARC::STB_rs9_rl: + } + + // Only incrementing forms after this point! + if (!AllowInc) + return false; + + switch (OP) { +#undef CASE_NO_DI +#define CASE_NO_DI(OP, Shift) \ + case ARC::OP##_AB_rs9: \ + case ARC::OP##_AW_rs9: \ + LogScale = Shift; \ + return true; +#undef CASE +#define CASE(OP, Shift) \ + case ARC::OP##_AB_rs9: \ + case ARC::OP##_AW_rs9: \ + case ARC::OP##_DI_AB_rs9: \ + case ARC::OP##_DI_AW_rs9: \ + LogScale = Shift; \ + return true + CASE(LDB_X, 0); + CASE(LDB, 0); + CASE(LDH_X, 1); + CASE(LDH, 1); + CASE(LD, 2); + // CASE(LDD): + // CASE_NO_DI(LDS): + // CASE(LDU): + // CASE_NO_DI(LDL): + // CASE_NO_DI(LDDL): + + // TODO upstream W6 and LIMM/SLIMM forms + CASE(ST, 2); + // CASE(STD): + CASE(STH, 1); + CASE(STB, 0); + // CASE_NO_DI(STL): + // CASE_NO_DI(STDL): + // CASE_NO_DI(FST16): + // CASE_NO_DI(FST32): + // CASE_NO_DI(FST64): + // CASE_NO_DI(FLD16): + // CASE_NO_DI(FLD32): + // CASE_NO_DI(FLD64): + } + return false; +} Index: llvm/lib/Target/ARC/MCTargetDesc/CMakeLists.txt =================================================================== --- llvm/lib/Target/ARC/MCTargetDesc/CMakeLists.txt +++ llvm/lib/Target/ARC/MCTargetDesc/CMakeLists.txt @@ -2,6 +2,7 @@ ARCInstPrinter.cpp ARCMCTargetDesc.cpp ARCMCAsmInfo.cpp + ARCMCUtil.cpp LINK_COMPONENTS MC