diff --git a/llvm/lib/Target/ARC/ARC.h b/llvm/lib/Target/ARC/ARC.h --- a/llvm/lib/Target/ARC/ARC.h +++ b/llvm/lib/Target/ARC/ARC.h @@ -25,6 +25,7 @@ FunctionPass *createARCISelDag(ARCTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createARCExpandPseudosPass(); +FunctionPass *createARCOptAddrMode(); FunctionPass *createARCBranchFinalizePass(); } // end namespace llvm diff --git a/llvm/lib/Target/ARC/ARCFrameLowering.cpp b/llvm/lib/Target/ARC/ARCFrameLowering.cpp --- a/llvm/lib/Target/ARC/ARCFrameLowering.cpp +++ b/llvm/lib/Target/ARC/ARCFrameLowering.cpp @@ -293,8 +293,8 @@ // Now, pop fp if necessary. if (hasFP(MF)) { BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::LD_AB_rs9)) - .addReg(ARC::SP, RegState::Define) .addReg(ARC::FP, RegState::Define) + .addReg(ARC::SP, RegState::Define) .addReg(ARC::SP) .addImm(4); } diff --git a/llvm/lib/Target/ARC/ARCInstrFormats.td b/llvm/lib/Target/ARC/ARCInstrFormats.td --- a/llvm/lib/Target/ARC/ARCInstrFormats.td +++ b/llvm/lib/Target/ARC/ARCInstrFormats.td @@ -55,6 +55,44 @@ let DecoderMethod = "DecodeGBR32ShortRegister"; } +// Helper classes for load/store instructions +class DataSizeMode mode, string instSfx, string asmSfx> { + bits<2> Value = mode; + string InstSuffix = instSfx; + string AsmSuffix = asmSfx; +} +class ExtMode { + bit Value = mode; + string InstSuffix = instSfx; + string AsmSuffix = asmSfx; +} + +class AddrMode mode, string instSfx, string asmSfx> { + bits<2> Value = mode; + string InstSuffix = instSfx; + string AsmSuffix = asmSfx; +} + +class CacheMode { + bit Value = mode; + string InstSuffix = instSfx; + string AsmSuffix = asmSfx; +} + +def ByteSM : DataSizeMode<0b01, "B", "b">; +def HalfSM : DataSizeMode<0b10, "H", "h">; +def WordSM : DataSizeMode<0b00, "", "">; + +def NoEM : ExtMode<0, "", "">; +def SignedEM : ExtMode<1, "_X", ".x">; + +def NoAM : AddrMode<0b00, "", "">; +def PreIncAM : AddrMode<0b01, "_AW", ".aw">; +def PostIncAM : AddrMode<0b10, "_AB", ".ab">; + +def NoCC : CacheMode<0b0, "", "">; +def UncachedCC : CacheMode<0b1, "_DI", ".di">; + class InstARC pattern> : Instruction, Encoding64 { @@ -64,6 +102,18 @@ let AsmString = asmstr; let Pattern = pattern; let Size = sz; + + // Load/Store instruction properties + DataSizeMode ZZ = WordSM; + ExtMode X = NoEM; + AddrMode AA = NoAM; + CacheMode DI = NoCC; + + // Field used for relation models + string BaseOpcode = ""; + + //TSFlags + let TSFlags{1-0} = AA.Value; } // ARC pseudo instructions format @@ -354,6 +404,8 @@ let Inst{8-7} = zz; let Inst{6} = x; let Inst{5-0} = A; + + let BaseOpcode = "ld_rs9"; } class F32_LD_ADDR aa, bit di, bits<2> zz, dag outs, dag ins, @@ -363,6 +415,8 @@ let B = addr{14-9}; let S9 = addr{8-0}; + + let BaseOpcode = "ld_rs9"; } @@ -387,6 +441,8 @@ let Inst{6} = x; let Inst{5-0} = A; let DecoderMethod = "DecodeLdLImmInstruction"; + + let BaseOpcode = "ld_limm"; } // Register + LImm load. The 32-bit immediate address is in Inst[63-32]. @@ -415,6 +471,8 @@ let Inst{11-6} = LImmReg; let Inst{5-0} = A; let DecoderMethod = "DecodeLdRLImmInstruction"; + + let BaseOpcode = "ld_rlimm"; } // Register + S9 Store. (B + S9) @@ -437,6 +495,8 @@ let Inst{4-3} = aa; let Inst{2-1} = zz; let Inst{0} = 0; + + let BaseOpcode = "st_rs9"; } class F32_ST_ADDR aa, bit di, bits<2> zz, dag outs, dag ins, @@ -446,6 +506,8 @@ let B = addr{14-9}; let S9 = addr{8-0}; + + let BaseOpcode = "st_rs9"; } // LImm Store. @@ -469,6 +531,8 @@ let Inst{2-1} = zz; let Inst{0} = 0; let DecoderMethod = "DecodeStLImmInstruction"; + + let BaseOpcode = "st_limm"; } // Compact Move/Load. diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.h b/llvm/lib/Target/ARC/ARCInstrInfo.h --- a/llvm/lib/Target/ARC/ARCInstrInfo.h +++ b/llvm/lib/Target/ARC/ARCInstrInfo.h @@ -81,6 +81,16 @@ bool reverseBranchCondition(SmallVectorImpl &Cond) const override; + + bool isPostIncrement(const MachineInstr &MI) const override; + + // ARC-specific + bool isPreIncrement(const MachineInstr &MI) const; + + virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, + unsigned &BasePos, + unsigned &OffsetPos) const override; + // Emit code before MBBI to load immediate value into physical register Reg. // Returns an iterator to the new instruction. MachineBasicBlock::iterator loadImmediate(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/llvm/lib/Target/ARC/ARCInstrInfo.cpp --- a/llvm/lib/Target/ARC/ARCInstrInfo.cpp +++ b/llvm/lib/Target/ARC/ARCInstrInfo.cpp @@ -27,6 +27,19 @@ #include "ARCGenInstrInfo.inc" #define DEBUG_TYPE "arc-inst-info" + +enum AddrIncType { + NoAddInc = 0, + PreInc = 1, + PostInc = 2, + Scaled = 3 +}; + +enum TSFlagsConstants { + TSF_AddrModeOff = 0, + TSF_AddModeMask = 3 +}; + // Pin the vtable to this file. void ARCInstrInfo::anchor() {} @@ -395,3 +408,35 @@ } return MI.getDesc().getSize(); } + +bool ARCInstrInfo::isPostIncrement(const MachineInstr &MI) const { + const MCInstrDesc &MID = MI.getDesc(); + const uint64_t F = MID.TSFlags; + return ((F >> TSF_AddrModeOff) & TSF_AddModeMask) == PostInc; +} + +bool ARCInstrInfo::isPreIncrement(const MachineInstr &MI) const { + const MCInstrDesc &MID = MI.getDesc(); + const uint64_t F = MID.TSFlags; + return ((F >> TSF_AddrModeOff) & TSF_AddModeMask) == PreInc; +} + +bool ARCInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI, + unsigned &BasePos, + unsigned &OffsetPos) const { + if (!MI.mayLoad() && !MI.mayStore()) + return false; + + BasePos = 1; + OffsetPos = 2; + + if (isPostIncrement(MI) || isPreIncrement(MI)) { + BasePos++; + OffsetPos++; + } + + if (!MI.getOperand(BasePos).isReg() || !MI.getOperand(OffsetPos).isImm()) + return false; + + return true; +} diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.td b/llvm/lib/Target/ARC/ARCInstrInfo.td --- a/llvm/lib/Target/ARC/ARCInstrInfo.td +++ b/llvm/lib/Target/ARC/ARCInstrInfo.td @@ -787,50 +787,47 @@ // Load/Store instructions. //---------------------------------------------------------------------------- +// Filter class for load/store mappings +class ArcLdStRel; + // Load instruction variants: // Control bits: x, aa, di, zz // x - sign extend. // aa - incrementing mode. (N/A for LIMM). // di - uncached. // zz - data size. -multiclass ArcLdInst zz, string asmop> { - let mayLoad = 1 in { - def _rs9 : F32_LD_ADDR<0, 0b00, 0, zz, - (outs GPR32:$A), (ins MEMrs9:$addr), - !strconcat(asmop, "\t$A, [$addr]"), []>; - - def _limm : F32_LD_LIMM<0, 0, zz, - (outs GPR32:$A), (ins MEMii:$addr), - !strconcat(asmop, "\t$A, [$addr]"), []>; - - def _rlimm : F32_LD_RLIMM<0, 0b00, 0, zz, - (outs GPR32:$A), (ins MEMrlimm:$addr), - !strconcat(asmop, "\t$A, [$addr]"), []>; - - def _X_rs9 : F32_LD_ADDR<1, 0b00, 0, zz, - (outs GPR32:$A), (ins MEMrs9:$addr), - !strconcat(asmop, ".x\t$A, [$addr]"), []>; - - def _X_limm : F32_LD_LIMM<1, 0, zz, - (outs GPR32:$A), (ins MEMii:$addr), - !strconcat(asmop, ".x\t$A, [$addr]"), []>; - - def _X_rlimm : F32_LD_RLIMM<1, 0b00, 0, zz, - (outs GPR32:$A), (ins MEMrlimm:$addr), - !strconcat(asmop, ".x\t$A, [$addr]"), []>; - - def _AB_rs9 : F32_LD_RS9<0, 0b10, 0, zz, - (outs GPR32:$addrout, GPR32:$A), - (ins GPR32:$B, immS<9>:$S9), - !strconcat(asmop, ".ab\t$A, [$B,$S9]"), []> - { let Constraints = "$addrout = $B"; } +multiclass ArcLdInst { + let mayLoad = 1, ZZ = zz, X = x, DI = di in { + def _rs9: F32_LD_ADDR, ArcLdStRel; + + def _limm: F32_LD_LIMM, ArcLdStRel; + + def _rlimm: F32_LD_RLIMM, ArcLdStRel; + + foreach aa = [PreIncAM, PostIncAM] in { + def aa.InstSuffix#_rs9: F32_LD_RS9:$S9), + asmop#aa.AsmSuffix#"\t$A, [$B,$S9]", []>, ArcLdStRel + { let Constraints = "$addrout = $B"; let AA = aa; } + } + } +} + +foreach di = [NoCC, UncachedCC] in { + defm LD#di.InstSuffix : ArcLdInst; + foreach zz = [ByteSM, HalfSM] in { + foreach x = [NoEM, SignedEM] in { + defm LD#zz.InstSuffix#x.InstSuffix#di.InstSuffix : ArcLdInst; + } } } - -// Load instruction definitions. -defm LD : ArcLdInst<0b00, "ld">; -defm LDH : ArcLdInst<0b10, "ldh">; -defm LDB : ArcLdInst<0b01, "ldb">; // Load instruction patterns. // 32-bit loads. @@ -872,25 +869,32 @@ // aa - incrementing mode. (N/A for LIMM). // di - uncached. // zz - data size. -multiclass ArcStInst zz, string asmop> { - let mayStore = 1 in { - def _rs9 : F32_ST_ADDR<0b00, 0, zz, (outs), (ins GPR32:$C, MEMrs9:$addr), - !strconcat(asmop, "\t$C, [$addr]"), []>; - - def _limm : F32_ST_LIMM<0, zz, (outs), (ins GPR32:$C, MEMii:$addr), - !strconcat(asmop, "\t$C, [$addr]"), []>; - - def _AW_rs9 : F32_ST_RS9<0b01, 0, zz, (outs GPR32:$addrout), - (ins GPR32:$C, GPR32:$B, immS<9>:$S9), - !strconcat(asmop, ".aw\t$C, [$B,$S9]"), []> - { let Constraints = "$addrout = $B"; } +multiclass ArcStInst { + let mayStore = 1, ZZ = zz, DI = di in { + def _rs9: F32_ST_ADDR, ArcLdStRel; + + def _limm: F32_ST_LIMM, ArcLdStRel; + + + foreach aa = [PreIncAM, PostIncAM] in { + def aa.InstSuffix#_rs9: F32_ST_RS9:$S9), + asmop#aa.AsmSuffix#"\t$C, [$B,$S9]", []>, ArcLdStRel + { let Constraints = "$addrout = $B"; let AA = aa; } + } } } -// Store instruction definitions. -defm ST : ArcStInst<0b00, "st">; -defm STH : ArcStInst<0b10, "sth">; -defm STB : ArcStInst<0b01, "stb">; +foreach di = [NoCC, UncachedCC] in { + foreach zz = [ByteSM, HalfSM, WordSM] in { + defm ST#zz.InstSuffix#di.InstSuffix : ArcStInst; + } +} // Store instruction patterns. // 32-bit stores @@ -911,3 +915,11 @@ def : Pat<(truncstorei8 i32:$C, AddrModeImm:$addr), (STB_limm i32:$C, AddrModeImm:$addr)>; +def getPostIncOpcode : InstrMapping { + let FilterClass = "ArcLdStRel"; + let RowFields = [ "BaseOpcode", "ZZ", "DI", "X"]; + let ColFields = [ "AA" ]; + let KeyCol = [ "NoAM" ]; + let ValueCols = [["PostIncAM"]]; +} + diff --git a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp @@ -0,0 +1,507 @@ +//===- ARCOptAddrMode.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass folds LD/ST + ADD pairs into Pre/Post-increment form of +/// load/store instructions. +//===----------------------------------------------------------------------===// + +#include "ARC.h" +#define GET_INSTRMAP_INFO +#include "ARCInstrInfo.h" +#include "ARCTargetMachine.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define OPTADDRMODE_DESC "ARC load/store address mode" +#define OPTADDRMODE_NAME "arc-addr-mode" +#define DEBUG_TYPE "arc-addr-mode" + +namespace llvm { +FunctionPass *createARCOptAddrMode(); +void initializeARCOptAddrModePass(PassRegistry &); +} // end namespace llvm + +namespace { +class ARCOptAddrMode : public MachineFunctionPass { +public: + static char ID; + + ARCOptAddrMode() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { return OPTADDRMODE_DESC; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + const ARCSubtarget *AST = nullptr; + const ARCInstrInfo *AII = nullptr; + MachineRegisterInfo *MRI = nullptr; + MachineDominatorTree *MDT = nullptr; + + // Tries to combine \p Ldst with increment of its base register to form + // single post-increment instruction. + MachineInstr *tryToCombine(MachineInstr &Ldst); + + // Returns true if result of \p Add is not used before \p Ldst + bool noUseOfAddBeforeLoadOrStore(const MachineInstr *Add, + const MachineInstr *Ldst); + + // Returns true if load/store instruction \p Ldst can be hoisted up to + // instruction \p To + bool canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To); + + // Returns true if load/store instruction \p Ldst can be sunk down + // to instruction \p To + bool canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To); + + // Check if instructions \p Ldst and \p Add can be moved to become adjacent + // If they can return instruction which need not to move. + // If \p Uses is not null, fill it with instructions after \p Ldst which use + // \p Ldst's base register + MachineInstr *canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add, + SmallVectorImpl *Uses); + + // Returns true if all instruction in \p Uses array can be adjusted + // to accomodate increment of register \p BaseReg by \p Incr + bool canFixPastUses(const ArrayRef &Uses, + MachineOperand &Incr, unsigned BaseReg); + + // Update all instructions in \p Uses to accomodate increment + // of \p BaseReg by \p Offset + void fixPastUses(ArrayRef Uses, unsigned BaseReg, + int64_t Offset); + + // Change instruction \p Ldst to postincrement form. + // \p NewBase is register to hold update base value + // \p NewOffset is instruction's new offset + void changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode, + unsigned NewBase, MachineOperand &NewOffset); + + bool processBasicBlock(MachineBasicBlock &MBB); +}; + +} // end anonymous namespace + +char ARCOptAddrMode::ID = 0; +INITIALIZE_PASS_BEGIN(ARCOptAddrMode, OPTADDRMODE_NAME, OPTADDRMODE_DESC, false, + false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(ARCOptAddrMode, OPTADDRMODE_NAME, OPTADDRMODE_DESC, false, + false) + +// Return true if \p Off can be used as immediate offset +// operand of load/store instruction (S9 literal) +static bool isValidLoadStoreOffset(int64_t Off) { return isInt<9>(Off); } + +// Return true if \p Off can be used as immediate operand of +// ADD/SUB instruction (U6 literal) +static bool isValidIncrementOffset(int64_t Off) { return isUInt<6>(Off); } + +static bool isAddConstantOp(const MachineInstr &MI, int64_t &Amount) { + int64_t Sign = 1; + switch (MI.getOpcode()) { + case ARC::SUB_rru6: + Sign = -1; + // LLVM_FALLTHROUGH + case ARC::ADD_rru6: + assert(MI.getOperand(2).isImm() && "Expected immediate operand"); + Amount = Sign * MI.getOperand(2).getImm(); + return true; + default: + return false; + } +} + +// Return true if \p MI dominates of uses of virtual register \p VReg +static bool dominatesAllUsesOf(const MachineInstr *MI, unsigned VReg, + MachineDominatorTree *MDT, + MachineRegisterInfo *MRI) { + + assert(TargetRegisterInfo::isVirtualRegister(VReg) && + "Expected virtual register!"); + + for (auto it = MRI->use_nodbg_begin(VReg), end = MRI->use_nodbg_end(); + it != end; ++it) { + MachineInstr *User = it->getParent(); + if (User->isPHI()) { + unsigned BBOperandIdx = User->getOperandNo(&*it) + 1; + MachineBasicBlock *MBB = User->getOperand(BBOperandIdx).getMBB(); + if (MBB->empty()) { + const MachineBasicBlock *InstBB = MI->getParent(); + assert(InstBB != MBB && "Instruction found in empty MBB"); + if (!MDT->dominates(InstBB, MBB)) + return false; + continue; + } + User = &*MBB->rbegin(); + } + + if (!MDT->dominates(MI, User)) + return false; + } + return true; +} + +// Return true if \p MI is load/store instruction with immediate offset +// which can be adjusted by \p Disp +static bool isLoadStoreThatCanHandleDisplacement(const TargetInstrInfo *TII, + const MachineInstr &MI, + int64_t Disp) { + unsigned BasePos, OffPos; + if (!TII->getBaseAndOffsetPosition(MI, BasePos, OffPos)) + return false; + const MachineOperand &MO = MI.getOperand(OffPos); + if (!MO.isImm()) + return false; + int64_t Offset = MO.getImm() + Disp; + return isValidLoadStoreOffset(Offset); +} + +bool ARCOptAddrMode::noUseOfAddBeforeLoadOrStore(const MachineInstr *Add, + const MachineInstr *Ldst) { + unsigned R = Add->getOperand(0).getReg(); + return dominatesAllUsesOf(Ldst, R, MDT, MRI); +} + +MachineInstr *ARCOptAddrMode::tryToCombine(MachineInstr &Ldst) { + assert((Ldst.mayLoad() || Ldst.mayStore()) && "LD/ST instruction expected"); + + unsigned BasePos, OffsetPos; + + LLVM_DEBUG(dbgs() << "[ABAW] tryToCombine " << Ldst); + if (!AII->getBaseAndOffsetPosition(Ldst, BasePos, OffsetPos)) { + LLVM_DEBUG(dbgs() << "[ABAW] Not a recognized load/store\n"); + return nullptr; + } + + MachineOperand &Base = Ldst.getOperand(BasePos); + MachineOperand &Offset = Ldst.getOperand(OffsetPos); + + assert(Base.isReg() && "Base operand must be register"); + if (!Offset.isImm()) { + LLVM_DEBUG(dbgs() << "[ABAW] Offset is not immediate\n"); + return nullptr; + } + + unsigned B = Base.getReg(); + if (TargetRegisterInfo::isStackSlot(B) || + !TargetRegisterInfo::isVirtualRegister(B)) { + LLVM_DEBUG(dbgs() << "[ABAW] Base is not VReg\n"); + return nullptr; + } + + // TODO: try to generate address preincrement + if (Offset.getImm() != 0) { + LLVM_DEBUG(dbgs() << "[ABAW] Non-zero offset\n"); + return nullptr; + } + + for (auto &Add : MRI->use_nodbg_instructions(B)) { + int64_t Incr; + if (!isAddConstantOp(Add, Incr)) + continue; + if (!isValidLoadStoreOffset(Incr)) + continue; + + SmallVector Uses; + MachineInstr *MoveTo = canJoinInstructions(&Ldst, &Add, &Uses); + + if (!MoveTo) + continue; + + if (!canFixPastUses(Uses, Add.getOperand(2), B)) + continue; + + LLVM_DEBUG(MachineInstr *First = &Ldst; MachineInstr *Last = &Add; + if (MDT->dominates(Last, First)) std::swap(First, Last); + dbgs() << "[ABAW] Instructions " << *First << " and " << *Last + << " combined\n"; + + ); + + MachineInstr *Result = Ldst.getNextNode(); + if (MoveTo == &Add) { + Ldst.removeFromParent(); + Add.getParent()->insertAfter(Add.getIterator(), &Ldst); + } + if (Result == &Add) + Result = Result->getNextNode(); + + fixPastUses(Uses, B, Incr); + + int NewOpcode = ARC::getPostIncOpcode(Ldst.getOpcode()); + assert(NewOpcode > 0 && "No postincrement form found"); + unsigned NewBaseReg = Add.getOperand(0).getReg(); + changeToAddrMode(Ldst, NewOpcode, NewBaseReg, Add.getOperand(2)); + Add.eraseFromParent(); + + return Result; + } + return nullptr; +} + +MachineInstr * +ARCOptAddrMode::canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add, + SmallVectorImpl *Uses) { + assert(Ldst && Add && "NULL instruction passed"); + + MachineInstr *First = Add; + MachineInstr *Last = Ldst; + if (MDT->dominates(Ldst, Add)) + std::swap(First, Last); + else if (!MDT->dominates(Add, Ldst)) + return nullptr; + + LLVM_DEBUG(dbgs() << "canJoinInstructions: " << *First << *Last); + + unsigned BasePos, OffPos; + + if (!AII->getBaseAndOffsetPosition(*Ldst, BasePos, OffPos)) { + LLVM_DEBUG( + dbgs() + << "[canJoinInstructions] Cannot determine base/offset position\n"); + return nullptr; + } + + unsigned BaseReg = Ldst->getOperand(BasePos).getReg(); + + // prohibit this: + // v1 = add v0, c + // st v1, [v0, 0] + // and this + // st v0, [v0, 0] + // v1 = add v0, c + if (Ldst->mayStore() && Ldst->getOperand(0).isReg()) { + unsigned StReg = Ldst->getOperand(0).getReg(); + if (Add->getOperand(0).getReg() == StReg || BaseReg == StReg) { + LLVM_DEBUG(dbgs() << "[canJoinInstructions] Store uses result of Add\n"); + return nullptr; + } + } + + SmallVector UsesAfterLdst; + SmallVector UsesAfterAdd; + for (MachineInstr &MI : MRI->use_nodbg_instructions(BaseReg)) { + if (&MI == Ldst || &MI == Add) + continue; + if (&MI != Add && MDT->dominates(Ldst, &MI)) + UsesAfterLdst.push_back(&MI); + else if (!MDT->dominates(&MI, Ldst)) + return nullptr; + if (MDT->dominates(Add, &MI)) + UsesAfterAdd.push_back(&MI); + } + + MachineInstr *Result = nullptr; + + if (First == Add) { + // n = add b, i + // ... + // x = ld [b, o] or x = ld [n, o] + + if (noUseOfAddBeforeLoadOrStore(First, Last)) { + Result = Last; + LLVM_DEBUG(dbgs() << "[canJoinInstructions] Can sink Add down to Ldst\n"); + } else if (canHoistLoadStoreTo(Ldst, Add)) { + Result = First; + LLVM_DEBUG(dbgs() << "[canJoinInstructions] Can hoist Ldst to Add\n"); + } + } else { + // x = ld [b, o] + // ... + // n = add b, i + Result = First; + LLVM_DEBUG(dbgs() << "[canJoinInstructions] Can hoist Add to Ldst\n"); + } + if (Result && Uses) + *Uses = (Result == Ldst) ? UsesAfterLdst : UsesAfterAdd; + return Result; +} + +bool ARCOptAddrMode::canFixPastUses(const ArrayRef &Uses, + MachineOperand &Incr, unsigned BaseReg) { + + assert(Incr.isImm() && "Expected immediate increment"); + int64_t NewOffset = Incr.getImm(); + for (MachineInstr *MI : Uses) { + int64_t Dummy; + if (isAddConstantOp(*MI, Dummy)) { + if (isValidIncrementOffset(Dummy + NewOffset)) + continue; + return false; + } + if (isLoadStoreThatCanHandleDisplacement(AII, *MI, -NewOffset)) + continue; + LLVM_DEBUG(dbgs() << "Instruction cannot handle displacement " << -NewOffset + << ": " << *MI); + return false; + } + return true; +} + +void ARCOptAddrMode::fixPastUses(ArrayRef Uses, + unsigned NewBase, int64_t NewOffset) { + + for (MachineInstr *MI : Uses) { + int64_t Amount; + unsigned BasePos, OffPos; + if (isAddConstantOp(*MI, Amount)) { + NewOffset += Amount; + assert(isValidIncrementOffset(NewOffset) && + "New offset won't fit into ADD instr"); + BasePos = 1; + OffPos = 2; + } else if (AII->getBaseAndOffsetPosition(*MI, BasePos, OffPos)) { + MachineOperand &MO = MI->getOperand(OffPos); + assert(MO.isImm() && "expected immediate operand"); + NewOffset += MO.getImm(); + assert(isValidLoadStoreOffset(NewOffset) && + "New offset won't fit into LD/ST"); + } else + llvm_unreachable("unexpected instruction"); + + MI->getOperand(BasePos).setReg(NewBase); + MI->getOperand(OffPos).setImm(NewOffset); + } +} + +bool ARCOptAddrMode::canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) { + if (Ldst->getParent() != To->getParent()) + return false; + MachineBasicBlock::const_iterator MI(To), ME(Ldst), + End(Ldst->getParent()->end()); + + bool IsStore = Ldst->mayStore(); + for (; MI != ME && MI != End; ++MI) { + if (MI->isDebugValue()) + continue; + if (MI->mayStore() || MI->isCall() || MI->isInlineAsm() || + MI->hasUnmodeledSideEffects()) + return false; + if (IsStore && MI->mayLoad()) + return false; + } + + for (auto &O : Ldst->explicit_operands()) { + if (!O.isReg() || !O.isUse()) + continue; + MachineInstr *OpDef = MRI->getVRegDef(O.getReg()); + if (!OpDef || !MDT->dominates(OpDef, To)) + return false; + } + return true; +} + +bool ARCOptAddrMode::canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) { + // Can only sink load/store within same BB + if (Ldst->getParent() != To->getParent()) + return false; + MachineBasicBlock::const_iterator MI(Ldst), ME(To), + End(Ldst->getParent()->end()); + + bool IsStore = Ldst->mayStore(); + bool IsLoad = Ldst->mayLoad(); + + unsigned ValReg = IsLoad ? Ldst->getOperand(0).getReg() : 0; + for (; MI != ME && MI != End; ++MI) { + if (MI->isDebugValue()) + continue; + if (MI->mayStore() || MI->isCall() || MI->isInlineAsm() || + MI->hasUnmodeledSideEffects()) + return false; + if (IsStore && MI->mayLoad()) + return false; + if (ValReg && MI->readsVirtualRegister(ValReg)) + return false; + } + return true; +} + +void ARCOptAddrMode::changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode, + unsigned NewBase, + MachineOperand &NewOffset) { + bool IsStore = Ldst.mayStore(); + unsigned BasePos, OffPos; + MachineOperand Src = MachineOperand::CreateImm(0xDEADBEEF); + AII->getBaseAndOffsetPosition(Ldst, BasePos, OffPos); + + unsigned BaseReg = Ldst.getOperand(BasePos).getReg(); + + Ldst.RemoveOperand(OffPos); + Ldst.RemoveOperand(BasePos); + + if (IsStore) { + Src = Ldst.getOperand(BasePos - 1); + Ldst.RemoveOperand(BasePos - 1); + } + + Ldst.setDesc(AST->getInstrInfo()->get(NewOpcode)); + Ldst.addOperand(MachineOperand::CreateReg(NewBase, true)); + if (IsStore) + Ldst.addOperand(Src); + Ldst.addOperand(MachineOperand::CreateReg(BaseReg, false)); + Ldst.addOperand(NewOffset); + LLVM_DEBUG(dbgs() << "[ABAW] New Ldst: " << Ldst); +} + +bool ARCOptAddrMode::processBasicBlock(MachineBasicBlock &MBB) { + bool Changed = false; + for (auto MI = MBB.begin(), ME = MBB.end(); MI != ME; ++MI) { + if (MI->isDebugValue()) + continue; + if (!MI->mayLoad() && !MI->mayStore()) + continue; + if (ARC::getPostIncOpcode(MI->getOpcode()) < 0) + continue; + MachineInstr *Res = tryToCombine(*MI); + if (Res) { + Changed = true; + // Res points to the next instruction. Rewind to process it + MI = std::prev(Res->getIterator()); + } + } + return Changed; +} + +bool ARCOptAddrMode::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + AST = &MF.getSubtarget(); + AII = AST->getInstrInfo(); + MRI = &MF.getRegInfo(); + MDT = &getAnalysis(); + + bool Changed = false; + for (auto &MBB : MF) + Changed |= processBasicBlock(MBB); + return Changed; +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createARCOptAddrMode() { return new ARCOptAddrMode(); } diff --git a/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/llvm/lib/Target/ARC/ARCTargetMachine.cpp --- a/llvm/lib/Target/ARC/ARCTargetMachine.cpp +++ b/llvm/lib/Target/ARC/ARCTargetMachine.cpp @@ -74,7 +74,10 @@ void ARCPassConfig::addPreEmitPass() { addPass(createARCBranchFinalizePass()); } -void ARCPassConfig::addPreRegAlloc() { addPass(createARCExpandPseudosPass()); } +void ARCPassConfig::addPreRegAlloc() { + addPass(createARCExpandPseudosPass()); + addPass(createARCOptAddrMode()); +} // Force static initialization. extern "C" void LLVMInitializeARCTarget() { diff --git a/llvm/lib/Target/ARC/CMakeLists.txt b/llvm/lib/Target/ARC/CMakeLists.txt --- a/llvm/lib/Target/ARC/CMakeLists.txt +++ b/llvm/lib/Target/ARC/CMakeLists.txt @@ -20,6 +20,7 @@ ARCISelLowering.cpp ARCMachineFunctionInfo.cpp ARCMCInstLower.cpp + ARCOptAddrMode.cpp ARCRegisterInfo.cpp ARCSubtarget.cpp ARCTargetMachine.cpp diff --git a/llvm/test/CodeGen/ARC/addrmode.ll b/llvm/test/CodeGen/ARC/addrmode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARC/addrmode.ll @@ -0,0 +1,68 @@ +; RUN: llc -march=arc < %s | FileCheck %s + +; CHECK-LABEL: copy +; CHECK-NOT: add +define void @copy(i8* inreg nocapture %p, i8* inreg nocapture readonly %q) { +entry: + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %p.addr.0 = phi i8* [ %p, %entry ], [ %incdec.ptr1, %while.cond ] + %q.addr.0 = phi i8* [ %q, %entry ], [ %incdec.ptr, %while.cond ] + %incdec.ptr = getelementptr inbounds i8, i8* %q.addr.0, i32 1 + %0 = load i8, i8* %q.addr.0, align 1 + %incdec.ptr1 = getelementptr inbounds i8, i8* %p.addr.0, i32 1 + store i8 %0, i8* %p.addr.0, align 1 + %tobool = icmp eq i8 %0, 0 + br i1 %tobool, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + ret void +} + + +%struct._llist = type { %struct._llist*, %struct._llist*, i32 } + +; CHECK-LABEL: neg1 +; CHECK-NOT: std.ab +define void @neg1(i8* inreg nocapture %a, i8* inreg nocapture readonly %b, i32 inreg %n) { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %i.07 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8, i8* %b, i32 %i.07 + %0 = load i8, i8* %arrayidx, align 1 + %mul = mul nuw nsw i32 %i.07, 257 + %arrayidx1 = getelementptr inbounds i8, i8* %a, i32 %mul + store i8 %0, i8* %arrayidx1, align 1 + %inc = add nuw nsw i32 %i.07, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: neg2 +; CHECK-NOT: st.ab +define void @neg2(%struct._llist* inreg %a, i32 inreg %n) { +entry: + %cmp13 = icmp sgt i32 %n, 0 + br i1 %cmp13, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %i.014 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds %struct._llist, %struct._llist* %a, i32 %i.014 + %next = getelementptr inbounds %struct._llist, %struct._llist* %arrayidx, i32 0, i32 0 + store %struct._llist* %arrayidx, %struct._llist** %next, align 4 + %prev = getelementptr inbounds %struct._llist, %struct._llist* %a, i32 %i.014, i32 1 + store %struct._llist* %arrayidx, %struct._llist** %prev, align 4 + %inc = add nuw nsw i32 %i.014, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/MC/Disassembler/ARC/ldst.txt b/llvm/test/MC/Disassembler/ARC/ldst.txt --- a/llvm/test/MC/Disassembler/ARC/ldst.txt +++ b/llvm/test/MC/Disassembler/ARC/ldst.txt @@ -45,3 +45,50 @@ # CHECK: st %r7, [63920] 0x00 0x1e 0xc0 0x71 0x00 0x00 0xb0 0xf9 +# CHECK: ldb.ab %r1, [%r0,1] +0x01 0x10 0x81 0x04 + +# CHECK: stb.ab %r2, [%r0,1] +0x01 0x18 0x92 0x00 + +# CHECK: ldh.ab %r3, [%r0,12] +0x0C 0x10 0x03 0x05 + +# CHECK: sth.ab %r4, [%r0,18] +0x12 0x18 0x14 0x01 + +# CHECK: ld.ab %r5, [%r2,128] +0x80 0x12 0x05 0x04 + +# CHECK: st.ab %r6, [%r2,64] +0x40 0x1A 0x90 0x01 + +# CHECK: ldb.aw %r7, [%r0,1] +0x01 0x10 0x87 0x02 + +# CHECK: stb.aw %r8, [%r0,1] +0x01 0x18 0x0A 0x02 + +# CHECK: ldh.aw %r3, [%r0,12] +0x0C 0x10 0x03 0x03 + +# CHECK: sth.aw %r3, [%r0,18] +0x12 0x18 0xCC 0x00 + +# CHECK: ld.aw %r6, [%r2,128] +0x80 0x12 0x06 0x02 + +# CHECK: st.aw %r6, [%r2,64] +0x40 0x1A 0x88 0x01 + +# CHECK: ld.aw %r6, [%r2,128] +0x80 0x12 0x06 0x02 + +# CHECK: st.aw %r6, [%r2,64] +0x40 0x1A 0x88 0x01 + +# CHECK: ldb.x.di.aw %r0, [%r8,8] +0x08 0x10 0xC0 0x1A + +# CHECK: stb.di.ab %r0, [%r9,64] +0x40 0x19 0x32 0x10