Index: lib/Target/Mips/CMakeLists.txt =================================================================== --- lib/Target/Mips/CMakeLists.txt +++ lib/Target/Mips/CMakeLists.txt @@ -31,14 +31,13 @@ MipsConstantIslandPass.cpp MipsDelaySlotFiller.cpp MipsFastISel.cpp - MipsHazardSchedule.cpp MipsInstrInfo.cpp MipsInstructionSelector.cpp MipsISelDAGToDAG.cpp MipsISelLowering.cpp MipsFrameLowering.cpp MipsLegalizerInfo.cpp - MipsLongBranch.cpp + MipsBranchExpansion.cpp MipsMCInstLower.cpp MipsMachineFunction.cpp MipsModuleISelDAGToDAG.cpp Index: lib/Target/Mips/Mips.h =================================================================== --- lib/Target/Mips/Mips.h +++ lib/Target/Mips/Mips.h @@ -33,8 +33,7 @@ FunctionPass *createMipsModuleISelDagPass(); FunctionPass *createMipsOptimizePICCallPass(); FunctionPass *createMipsDelaySlotFillerPass(); - FunctionPass *createMipsHazardSchedule(); - FunctionPass *createMipsLongBranchPass(); + FunctionPass *createMipsBranchExpansion(); FunctionPass *createMipsConstantIslandPass(); FunctionPass *createMicroMipsSizeReductionPass(); Index: lib/Target/Mips/MipsBranchExpansion.cpp =================================================================== --- /dev/null +++ lib/Target/Mips/MipsBranchExpansion.cpp @@ -0,0 +1,784 @@ +//===----------------------- MipsBranchExpansion.cpp ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This pass do two things: +/// - it expands a branch or jump instruction into a long branch if its offset +/// is too large to fit into its immediate field, +/// - it inserts nops to prevent forbidden slot hazards. +/// +/// The reason why this pass combines these two tasks is that one of these two +/// tasks can break the result of the previous one. +/// +/// Example of that is a situation where at first, no branch should be expanded, +/// but after adding at least one nop somewhere in the code to prevent a +/// forbidden slot hazard, offset of some branches may go out of range. In that +/// case it is necessary to check again if there is some branch that needs +/// expansion. On the other hand, expanding some branch may cause a control +/// transfer instruction to appear in the forbidden slot, which is a hazard that +/// should be fixed. This pass alternates between this two tasks untill no +/// changes are made. Only then we can be sure that all branches are expanded +/// properly, and no hazard situations exist. +/// +/// Regarding branch expanding: +/// +/// When branch instruction like beqzc or bnezc has offset that is too large +/// to fit into its immediate field, it has to be expanded to another +/// instruction or series of instructions. +/// +/// FIXME: Fix pc-region jump instructions which cross 256MB segment boundaries. +/// TODO: Handle out of range bc, b (pseudo) instructions. +/// +/// Regarding compact branch hazard prevention: +/// +/// Hazards handled: forbidden slots for MIPSR6. +/// +/// A forbidden slot hazard occurs when a compact branch instruction is executed +/// and the adjacent instruction in memory is a control transfer instruction +/// such as a branch or jump, ERET, ERETNC, DERET, WAIT and PAUSE. +/// +/// For example: +/// +/// 0x8004 bnec a1,v0, +/// 0x8008 beqc a1,a2, +/// +/// In such cases, the processor is required to signal a Reserved Instruction +/// exception. +/// +/// Here, if the instruction at 0x8004 is executed, the processor will raise an +/// exception as there is a control transfer instruction at 0x8008. +/// +/// There are two sources of forbidden slot hazards: +/// +/// A) A previous pass has created a compact branch directly. +/// B) Transforming a delay slot branch into compact branch. This case can be +/// difficult to process as lookahead for hazards is insufficient, as +/// backwards delay slot fillling can also produce hazards in previously +/// processed instuctions. +/// +/// In future this pass can be extended (or new pass can be created) to handle +/// other pipeline hazards, such as various MIPS1 hazards, processor errata that +/// require instruction reorganization, etc. +/// +/// This pass has to run after the delay slot filler as that pass can introduce +/// pipeline hazards such as compact branch hazard, hence the existing hazard +/// recognizer is not suitable. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/MipsABIInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MCTargetDesc/MipsMCNaCl.h" +#include "MCTargetDesc/MipsMCTargetDesc.h" +#include "Mips.h" +#include "MipsInstrInfo.h" +#include "MipsMachineFunction.h" +#include "MipsSubtarget.h" +#include "MipsTargetMachine.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "mips-branch-expansion" + +STATISTIC(NumInsertedNops, "Number of nops inserted"); +STATISTIC(LongBranches, "Number of long branches."); + +static cl::opt SkipLongBranch( + "skip-mips-long-branch", + cl::init(false), + cl::desc("MIPS: Skip long branch pass."), + cl::Hidden); + +static cl::opt ForceLongBranch( + "force-mips-long-branch", + cl::init(false), + cl::desc("MIPS: Expand all branches to long format."), + cl::Hidden); + +namespace { + +using Iter = MachineBasicBlock::iterator; +using ReverseIter = MachineBasicBlock::reverse_iterator; + +struct MBBInfo { + uint64_t Size = 0; + bool HasLongBranch = false; + MachineInstr *Br = nullptr; + MBBInfo() = default; +}; + +class MipsBranchExpansion : public MachineFunctionPass { +public: + MipsBranchExpansion() : MachineFunctionPass(ID), ABI(MipsABIInfo::Unknown()) {} + + StringRef getPassName() const override { return "Mips Long Branch and Hazard Pass"; } + + bool runOnMachineFunction(MachineFunction &F) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + static char ID; + + void splitMBB(MachineBasicBlock *MBB); + void initMBBInfo(); + int64_t computeOffset(const MachineInstr *Br); + void replaceBranch(MachineBasicBlock &MBB, Iter Br, const DebugLoc &DL, + MachineBasicBlock *MBBOpnd); + void expandToLongBranch(MBBInfo &Info); + bool handleForbiddenSlot(MachineFunction &MF); + bool handlePossibleLongBranch(MachineFunction &MF); + + MachineFunction *MFp; + SmallVector MBBInfos; + bool IsPIC; + MipsABIInfo ABI; + unsigned LongBranchSeqSize; + bool isGPDispEmitted; +}; + +} // end of anonymous namespace + +char MipsBranchExpansion::ID = 0; + +/// Returns a pass that clears pipeline hazards. +FunctionPass *llvm::createMipsBranchExpansion() { + return new MipsBranchExpansion(); +} + +// Find the next real instruction from the current position in current basic +// block. +static Iter getNextMachineInstrInBB(Iter Position) { + Iter I = Position, E = Position->getParent()->end(); + I = std::find_if_not(I, E, + [](const Iter &Insn) { return Insn->isTransient(); }); + + return I; +} + +// Find the next real instruction from the current position, looking through +// basic block boundaries. +static std::pair getNextMachineInstr(Iter Position, MachineBasicBlock * Parent) { + if (Position == Parent->end()) { + do { + MachineBasicBlock *Succ = Parent->getNextNode(); + if (Succ != nullptr && Parent->isSuccessor(Succ)) { + Position = Succ->begin(); + Parent = Succ; + } else { + return std::make_pair(Position, true); + } + } while (Parent->empty()); + } + + Iter Instr = getNextMachineInstrInBB(Position); + if (Instr == Parent->end()) { + return getNextMachineInstr(Instr, Parent); + } + return std::make_pair(Instr, false); +} + +/// Iterate over list of Br's operands and search for a MachineBasicBlock +/// operand. +static MachineBasicBlock *getTargetMBB(const MachineInstr &Br) { + for (unsigned I = 0, E = Br.getDesc().getNumOperands(); I < E; ++I) { + const MachineOperand &MO = Br.getOperand(I); + + if (MO.isMBB()) + return MO.getMBB(); + } + + llvm_unreachable("This instruction does not have an MBB operand."); +} + +// Traverse the list of instructions backwards until a non-debug instruction is +// found or it reaches E. +static ReverseIter getNonDebugInstr(ReverseIter B, const ReverseIter &E) { + for (; B != E; ++B) + if (!B->isDebugValue()) + return B; + + return E; +} + +// Split MBB if it has two direct jumps/branches. +void MipsBranchExpansion::splitMBB(MachineBasicBlock *MBB) { + ReverseIter End = MBB->rend(); + ReverseIter LastBr = getNonDebugInstr(MBB->rbegin(), End); + + // Return if MBB has no branch instructions. + if ((LastBr == End) || + (!LastBr->isConditionalBranch() && !LastBr->isUnconditionalBranch())) + return; + + ReverseIter FirstBr = getNonDebugInstr(std::next(LastBr), End); + + // MBB has only one branch instruction if FirstBr is not a branch + // instruction. + if ((FirstBr == End) || + (!FirstBr->isConditionalBranch() && !FirstBr->isUnconditionalBranch())) + return; + + assert(!FirstBr->isIndirectBranch() && "Unexpected indirect branch found."); + + // Create a new MBB. Move instructions in MBB to the newly created MBB. + MachineBasicBlock *NewMBB = + MFp->CreateMachineBasicBlock(MBB->getBasicBlock()); + + // Insert NewMBB and fix control flow. + MachineBasicBlock *Tgt = getTargetMBB(*FirstBr); + NewMBB->transferSuccessors(MBB); + NewMBB->removeSuccessor(Tgt, true); + MBB->addSuccessor(NewMBB); + MBB->addSuccessor(Tgt); + MFp->insert(std::next(MachineFunction::iterator(MBB)), NewMBB); + + NewMBB->splice(NewMBB->end(), MBB, LastBr.getReverse(), MBB->end()); +} + +// Fill MBBInfos. +void MipsBranchExpansion::initMBBInfo() { + // Split the MBBs if they have two branches. Each basic block should have at + // most one branch after this loop is executed. + for (auto &MBB : *MFp) + splitMBB(&MBB); + + MFp->RenumberBlocks(); + MBBInfos.clear(); + MBBInfos.resize(MFp->size()); + + const MipsInstrInfo *TII = + static_cast(MFp->getSubtarget().getInstrInfo()); + for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) { + MachineBasicBlock *MBB = MFp->getBlockNumbered(I); + + // Compute size of MBB. + for (MachineBasicBlock::instr_iterator MI = MBB->instr_begin(); + MI != MBB->instr_end(); ++MI) + MBBInfos[I].Size += TII->getInstSizeInBytes(*MI); + + // Search for MBB's branch instruction. + ReverseIter End = MBB->rend(); + ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End); + + if ((Br != End) && !Br->isIndirectBranch() && + (Br->isConditionalBranch() || (Br->isUnconditionalBranch() && IsPIC))) + MBBInfos[I].Br = &*Br; + } +} + +// Compute offset of branch in number of bytes. +int64_t MipsBranchExpansion::computeOffset(const MachineInstr *Br) { + int64_t Offset = 0; + int ThisMBB = Br->getParent()->getNumber(); + int TargetMBB = getTargetMBB(*Br)->getNumber(); + + // Compute offset of a forward branch. + if (ThisMBB < TargetMBB) { + for (int N = ThisMBB + 1; N < TargetMBB; ++N) + Offset += MBBInfos[N].Size; + + return Offset + 4; + } + + // Compute offset of a backward branch. + for (int N = ThisMBB; N >= TargetMBB; --N) + Offset += MBBInfos[N].Size; + + return -Offset + 4; +} + +// Replace Br with a branch which has the opposite condition code and a +// MachineBasicBlock operand MBBOpnd. +void MipsBranchExpansion::replaceBranch(MachineBasicBlock &MBB, Iter Br, + const DebugLoc &DL, + MachineBasicBlock *MBBOpnd) { + const MipsInstrInfo *TII = static_cast( + MBB.getParent()->getSubtarget().getInstrInfo()); + unsigned NewOpc = TII->getOppositeBranchOpc(Br->getOpcode()); + const MCInstrDesc &NewDesc = TII->get(NewOpc); + + MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc); + + for (unsigned I = 0, E = Br->getDesc().getNumOperands(); I < E; ++I) { + MachineOperand &MO = Br->getOperand(I); + + if (!MO.isReg()) { + assert(MO.isMBB() && "MBB operand expected."); + break; + } + + MIB.addReg(MO.getReg()); + } + + MIB.addMBB(MBBOpnd); + + if (Br->hasDelaySlot()) { + // Bundle the instruction in the delay slot to the newly created branch + // and erase the original branch. + assert(Br->isBundledWithSucc()); + MachineBasicBlock::instr_iterator II = Br.getInstrIterator(); + MIBundleBuilder(&*MIB).append((++II)->removeFromBundle()); + } + Br->eraseFromParent(); +} + +// Expand branch instructions to long branches. +// TODO: This function has to be fixed for beqz16 and bnez16, because it +// currently assumes that all branches have 16-bit offsets, and will produce +// wrong code if branches whose allowed offsets are [-128, -126, ..., 126] +// are present. +void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) { + MachineBasicBlock::iterator Pos; + MachineBasicBlock *MBB = I.Br->getParent(), *TgtMBB = getTargetMBB(*I.Br); + DebugLoc DL = I.Br->getDebugLoc(); + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator FallThroughMBB = ++MachineFunction::iterator(MBB); + MachineBasicBlock *LongBrMBB = MFp->CreateMachineBasicBlock(BB); + const MipsSubtarget &Subtarget = + static_cast(MFp->getSubtarget()); + const MipsInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); + + MFp->insert(FallThroughMBB, LongBrMBB); + MBB->replaceSuccessor(TgtMBB, LongBrMBB); + + if (IsPIC) { + MachineBasicBlock *BalTgtMBB = MFp->CreateMachineBasicBlock(BB); + MFp->insert(FallThroughMBB, BalTgtMBB); + LongBrMBB->addSuccessor(BalTgtMBB); + BalTgtMBB->addSuccessor(TgtMBB); + + // We must select between the MIPS32r6/MIPS64r6 BALC (which is a normal + // instruction) and the pre-MIPS32r6/MIPS64r6 definition (which is an + // pseudo-instruction wrapping BGEZAL). + const unsigned BalOp = + Subtarget.hasMips32r6() + ? Subtarget.inMicroMipsMode() ? Mips::BALC_MMR6 : Mips::BALC + : Mips::BAL_BR; + + if (!ABI.IsN64()) { + // Pre R6: + // $longbr: + // addiu $sp, $sp, -8 + // sw $ra, 0($sp) + // lui $at, %hi($tgt - $baltgt) + // bal $baltgt + // addiu $at, $at, %lo($tgt - $baltgt) + // $baltgt: + // addu $at, $ra, $at + // lw $ra, 0($sp) + // jr $at + // addiu $sp, $sp, 8 + // $fallthrough: + // + + // R6: + // $longbr: + // addiu $sp, $sp, -8 + // sw $ra, 0($sp) + // lui $at, %hi($tgt - $baltgt) + // addiu $at, $at, %lo($tgt - $baltgt) + // balc $baltgt + // $baltgt: + // addu $at, $ra, $at + // lw $ra, 0($sp) + // addiu $sp, $sp, 8 + // jic $at, 0 + // $fallthrough: + + Pos = LongBrMBB->begin(); + + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(-8); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SW)).addReg(Mips::RA) + .addReg(Mips::SP).addImm(0); + + // LUi and ADDiu instructions create 32-bit offset of the target basic + // block from the target of BAL(C) instruction. We cannot use immediate + // value for this offset because it cannot be determined accurately when + // the program has inline assembly statements. We therefore use the + // relocation expressions %hi($tgt-$baltgt) and %lo($tgt-$baltgt) which + // are resolved during the fixup, so the values will always be correct. + // + // Since we cannot create %hi($tgt-$baltgt) and %lo($tgt-$baltgt) + // expressions at this point (it is possible only at the MC layer), + // we replace LUi and ADDiu with pseudo instructions + // LONG_BRANCH_LUi and LONG_BRANCH_ADDiu, and add both basic + // blocks as operands to these instructions. When lowering these pseudo + // instructions to LUi and ADDiu in the MC layer, we will create + // %hi($tgt-$baltgt) and %lo($tgt-$baltgt) expressions and add them as + // operands to lowered instructions. + + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi), Mips::AT) + .addMBB(TgtMBB).addMBB(BalTgtMBB); + + MachineInstrBuilder BalInstr = + BuildMI(*MFp, DL, TII->get(BalOp)).addMBB(BalTgtMBB); + MachineInstrBuilder ADDiuInstr = + BuildMI(*MFp, DL, TII->get(Mips::LONG_BRANCH_ADDiu), Mips::AT) + .addReg(Mips::AT) + .addMBB(TgtMBB) + .addMBB(BalTgtMBB); + if (Subtarget.hasMips32r6()) { + LongBrMBB->insert(Pos, ADDiuInstr); + LongBrMBB->insert(Pos, BalInstr); + } else { + LongBrMBB->insert(Pos, BalInstr); + LongBrMBB->insert(Pos, ADDiuInstr); + LongBrMBB->rbegin()->bundleWithPred(); + } + + Pos = BalTgtMBB->begin(); + + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDu), Mips::AT) + .addReg(Mips::RA).addReg(Mips::AT); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA) + .addReg(Mips::SP).addImm(0); + if (Subtarget.isTargetNaCl()) + // Bundle-align the target of indirect branch JR. + TgtMBB->setAlignment(MIPS_NACL_BUNDLE_ALIGN); + + // In NaCl, modifying the sp is not allowed in branch delay slot. + // For MIPS32R6, we can skip using a delay slot branch. + if (Subtarget.isTargetNaCl() || + (Subtarget.hasMips32r6() && !Subtarget.useIndirectJumpsHazard())) + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(8); + + if (Subtarget.hasMips32r6() && !Subtarget.useIndirectJumpsHazard()) { + const unsigned JICOp = + Subtarget.inMicroMipsMode() ? Mips::JIC_MMR6 : Mips::JIC; + BuildMI(*BalTgtMBB, Pos, DL, TII->get(JICOp)) + .addReg(Mips::AT) + .addImm(0); + + } else { + unsigned JROp = + Subtarget.useIndirectJumpsHazard() + ? (Subtarget.hasMips32r6() ? Mips::JR_HB_R6 : Mips::JR_HB) + : Mips::JR; + BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT); + + if (Subtarget.isTargetNaCl()) { + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP)); + } else + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP) + .addImm(8); + + BalTgtMBB->rbegin()->bundleWithPred(); + } + } else { + // Pre R6: + // $longbr: + // daddiu $sp, $sp, -16 + // sd $ra, 0($sp) + // daddiu $at, $zero, %hi($tgt - $baltgt) + // dsll $at, $at, 16 + // bal $baltgt + // daddiu $at, $at, %lo($tgt - $baltgt) + // $baltgt: + // daddu $at, $ra, $at + // ld $ra, 0($sp) + // jr64 $at + // daddiu $sp, $sp, 16 + // $fallthrough: + + // R6: + // $longbr: + // daddiu $sp, $sp, -16 + // sd $ra, 0($sp) + // daddiu $at, $zero, %hi($tgt - $baltgt) + // dsll $at, $at, 16 + // daddiu $at, $at, %lo($tgt - $baltgt) + // balc $baltgt + // $baltgt: + // daddu $at, $ra, $at + // ld $ra, 0($sp) + // daddiu $sp, $sp, 16 + // jic $at, 0 + // $fallthrough: + + // We assume the branch is within-function, and that offset is within + // +/- 2GB. High 32 bits will therefore always be zero. + + // Note that this will work even if the offset is negative, because + // of the +1 modification that's added in that case. For example, if the + // offset is -1MB (0xFFFFFFFFFFF00000), the computation for %higher is + // + // 0xFFFFFFFFFFF00000 + 0x80008000 = 0x000000007FF08000 + // + // and the bits [47:32] are zero. For %highest + // + // 0xFFFFFFFFFFF00000 + 0x800080008000 = 0x000080007FF08000 + // + // and the bits [63:48] are zero. + + Pos = LongBrMBB->begin(); + + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) + .addReg(Mips::SP_64).addImm(-16); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SD)).addReg(Mips::RA_64) + .addReg(Mips::SP_64).addImm(0); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu), + Mips::AT_64).addReg(Mips::ZERO_64) + .addMBB(TgtMBB, MipsII::MO_ABS_HI).addMBB(BalTgtMBB); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64) + .addReg(Mips::AT_64).addImm(16); + + MachineInstrBuilder BalInstr = + BuildMI(*MFp, DL, TII->get(BalOp)).addMBB(BalTgtMBB); + MachineInstrBuilder DADDiuInstr = + BuildMI(*MFp, DL, TII->get(Mips::LONG_BRANCH_DADDiu), Mips::AT_64) + .addReg(Mips::AT_64) + .addMBB(TgtMBB, MipsII::MO_ABS_LO) + .addMBB(BalTgtMBB); + if (Subtarget.hasMips32r6()) { + LongBrMBB->insert(Pos, DADDiuInstr); + LongBrMBB->insert(Pos, BalInstr); + } else { + LongBrMBB->insert(Pos, BalInstr); + LongBrMBB->insert(Pos, DADDiuInstr); + LongBrMBB->rbegin()->bundleWithPred(); + } + + Pos = BalTgtMBB->begin(); + + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDu), Mips::AT_64) + .addReg(Mips::RA_64).addReg(Mips::AT_64); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64) + .addReg(Mips::SP_64).addImm(0); + + if (Subtarget.hasMips64r6() && !Subtarget.useIndirectJumpsHazard()) { + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) + .addReg(Mips::SP_64) + .addImm(16); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JIC64)) + .addReg(Mips::AT_64) + .addImm(0); + } else { + unsigned JROp = + Subtarget.useIndirectJumpsHazard() + ? (Subtarget.hasMips32r6() ? Mips::JR_HB64_R6 : Mips::JR_HB64) + : Mips::JR64; + BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT_64); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) + .addReg(Mips::SP_64) + .addImm(16); + BalTgtMBB->rbegin()->bundleWithPred(); + } + } + + assert(LongBrMBB->size() + BalTgtMBB->size() == LongBranchSeqSize); + } else { + // Pre R6: R6: + // $longbr: $longbr: + // j $tgt bc $tgt + // nop $fallthrough + // $fallthrough: + // + Pos = LongBrMBB->begin(); + LongBrMBB->addSuccessor(TgtMBB); + if (Subtarget.hasMips32r6()) + BuildMI(*LongBrMBB, Pos, DL, + TII->get(Subtarget.inMicroMipsMode() ? Mips::BC_MMR6 : Mips::BC)) + .addMBB(TgtMBB); + else + MIBundleBuilder(*LongBrMBB, Pos) + .append(BuildMI(*MFp, DL, TII->get(Mips::J)).addMBB(TgtMBB)) + .append(BuildMI(*MFp, DL, TII->get(Mips::NOP))); + + assert(LongBrMBB->size() == LongBranchSeqSize); + } + + if (I.Br->isUnconditionalBranch()) { + // Change branch destination. + assert(I.Br->getDesc().getNumOperands() == 1); + I.Br->RemoveOperand(0); + I.Br->addOperand(MachineOperand::CreateMBB(LongBrMBB)); + } else + // Change branch destination and reverse condition. + replaceBranch(*MBB, I.Br, DL, &*FallThroughMBB); +} + +static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) { + MachineBasicBlock &MBB = F.front(); + MachineBasicBlock::iterator I = MBB.begin(); + DebugLoc DL = MBB.findDebugLoc(MBB.begin()); + BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::V0) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII->get(Mips::ADDiu), Mips::V0) + .addReg(Mips::V0).addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + MBB.removeLiveIn(Mips::V0); +} + + +bool MipsBranchExpansion::handleForbiddenSlot(MachineFunction &MF) { + + const MipsSubtarget *STI = + &static_cast(MF.getSubtarget()); + + // Forbidden slot hazards are only defined for MIPSR6 but not microMIPSR6. + if (!STI->hasMips32r6() || STI->inMicroMipsMode()) + return false; + + const MipsInstrInfo *TII = STI->getInstrInfo(); + + bool Changed = false; + + for (MachineFunction::iterator FI = MF.begin(); FI != MF.end(); ++FI) { + for (Iter I = FI->begin(); I != FI->end(); ++I) { + + // Forbidden slot hazard handling. Use lookahead over state. + if (!TII->HasForbiddenSlot(*I)) + continue; + + Iter Inst; + bool LastInstInFunction = + std::next(I) == FI->end() && std::next(FI) == MF.end(); + if (!LastInstInFunction) { + std::pair Res = getNextMachineInstr(std::next(I), &*FI); + LastInstInFunction |= Res.second; + Inst = Res.first; + } + + if (LastInstInFunction || !TII->SafeInForbiddenSlot(*Inst)) { + + MachineBasicBlock::instr_iterator Iit = I->getIterator(); + if (std::next(Iit) == FI->end() || std::next(Iit)->getOpcode() != Mips::NOP) { + Changed = true; + MIBundleBuilder(&*I).append(BuildMI(MF, I->getDebugLoc(), TII->get(Mips::NOP))); + NumInsertedNops++; + } + } + } + } + + return Changed; +} + +bool MipsBranchExpansion::handlePossibleLongBranch(MachineFunction &MF) { + const MipsSubtarget &STI = + static_cast(MF.getSubtarget()); + const MipsInstrInfo *TII = + static_cast(STI.getInstrInfo()); + + const TargetMachine& TM = MF.getTarget(); + IsPIC = TM.isPositionIndependent(); + ABI = static_cast(TM).getABI(); + + LongBranchSeqSize = IsPIC ? ((ABI.IsN64() || STI.isTargetNaCl()) ? 10 : 9) + : (STI.hasMips32r6() ? 1 : 2); + + if (STI.inMips16Mode() || !STI.enableLongBranchPass()) + return false; + + + if (IsPIC && static_cast(TM).getABI().IsO32() && + MF.getInfo()->globalBaseRegSet() && !isGPDispEmitted) + emitGPDisp(MF, TII); + isGPDispEmitted = true; + + if (SkipLongBranch) + return false; + + MFp = &MF; + initMBBInfo(); + + SmallVectorImpl::iterator I, E = MBBInfos.end(); + bool EverMadeChange = false, MadeChange = true; + + while (MadeChange) { + MadeChange = false; + + for (I = MBBInfos.begin(); I != E; ++I) { + // Skip if this MBB doesn't have a branch or the branch has already been + // converted to a long branch. + if (!I->Br || I->HasLongBranch) + continue; + + int ShVal = STI.inMicroMipsMode() ? 2 : 4; + int64_t Offset = computeOffset(I->Br) / ShVal; + + if (STI.isTargetNaCl()) { + // The offset calculation does not include sandboxing instructions + // that will be added later in the MC layer. Since at this point we + // don't know the exact amount of code that "sandboxing" will add, we + // conservatively estimate that code will not grow more than 100%. + Offset *= 2; + } + + // Check if offset fits into 16-bit immediate field of branches. + if (!ForceLongBranch && isInt<16>(Offset)) + continue; + + I->HasLongBranch = true; + I->Size += LongBranchSeqSize * 4; + ++LongBranches; + EverMadeChange = MadeChange = true; + } + } + + if (!EverMadeChange) + return false; + + // Do the expansion. + for (I = MBBInfos.begin(); I != E; ++I) + if (I->HasLongBranch) { + expandToLongBranch(*I); + } + + MF.RenumberBlocks(); + + return true; +} + +bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) { + isGPDispEmitted = false; + + // Run these two at least once + bool longBranchChanged = handlePossibleLongBranch(MF); + bool forbiddenSlotChanged = handleForbiddenSlot(MF); + + bool Changed = longBranchChanged || forbiddenSlotChanged; + + // Then run them alternatively while there are changes + while (forbiddenSlotChanged) { + longBranchChanged = handlePossibleLongBranch(MF); + if (!longBranchChanged) break; + forbiddenSlotChanged = handleForbiddenSlot(MF); + } + + return Changed; +} Index: lib/Target/Mips/MipsHazardSchedule.cpp =================================================================== --- lib/Target/Mips/MipsHazardSchedule.cpp +++ /dev/null @@ -1,163 +0,0 @@ -//===- MipsHazardSchedule.cpp - Workaround pipeline hazards ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This pass is used to workaround certain pipeline hazards. For now, this -/// covers compact branch hazards. In future this pass can be extended to other -/// pipeline hazards, such as various MIPS1 hazards, processor errata that -/// require instruction reorganization, etc. -/// -/// This pass has to run after the delay slot filler as that pass can introduce -/// pipeline hazards, hence the existing hazard recognizer is not suitable. -/// -/// Hazards handled: forbidden slots for MIPSR6. -/// -/// A forbidden slot hazard occurs when a compact branch instruction is executed -/// and the adjacent instruction in memory is a control transfer instruction -/// such as a branch or jump, ERET, ERETNC, DERET, WAIT and PAUSE. -/// -/// For example: -/// -/// 0x8004 bnec a1,v0, -/// 0x8008 beqc a1,a2, -/// -/// In such cases, the processor is required to signal a Reserved Instruction -/// exception. -/// -/// Here, if the instruction at 0x8004 is executed, the processor will raise an -/// exception as there is a control transfer instruction at 0x8008. -/// -/// There are two sources of forbidden slot hazards: -/// -/// A) A previous pass has created a compact branch directly. -/// B) Transforming a delay slot branch into compact branch. This case can be -/// difficult to process as lookahead for hazards is insufficient, as -/// backwards delay slot fillling can also produce hazards in previously -/// processed instuctions. -/// -//===----------------------------------------------------------------------===// - -#include "Mips.h" -#include "MipsInstrInfo.h" -#include "MipsSubtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include -#include -#include - -using namespace llvm; - -#define DEBUG_TYPE "mips-hazard-schedule" - -STATISTIC(NumInsertedNops, "Number of nops inserted"); - -namespace { - -using Iter = MachineBasicBlock::iterator; -using ReverseIter = MachineBasicBlock::reverse_iterator; - -class MipsHazardSchedule : public MachineFunctionPass { -public: - MipsHazardSchedule() : MachineFunctionPass(ID) {} - - StringRef getPassName() const override { return "Mips Hazard Schedule"; } - - bool runOnMachineFunction(MachineFunction &F) override; - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } - -private: - static char ID; -}; - -} // end of anonymous namespace - -char MipsHazardSchedule::ID = 0; - -/// Returns a pass that clears pipeline hazards. -FunctionPass *llvm::createMipsHazardSchedule() { - return new MipsHazardSchedule(); -} - -// Find the next real instruction from the current position in current basic -// block. -static Iter getNextMachineInstrInBB(Iter Position) { - Iter I = Position, E = Position->getParent()->end(); - I = std::find_if_not(I, E, - [](const Iter &Insn) { return Insn->isTransient(); }); - - return I; -} - -// Find the next real instruction from the current position, looking through -// basic block boundaries. -static std::pair getNextMachineInstr(Iter Position, MachineBasicBlock * Parent) { - if (Position == Parent->end()) { - do { - MachineBasicBlock *Succ = Parent->getNextNode(); - if (Succ != nullptr && Parent->isSuccessor(Succ)) { - Position = Succ->begin(); - Parent = Succ; - } else { - return std::make_pair(Position, true); - } - } while (Parent->empty()); - } - - Iter Instr = getNextMachineInstrInBB(Position); - if (Instr == Parent->end()) { - return getNextMachineInstr(Instr, Parent); - } - return std::make_pair(Instr, false); -} - -bool MipsHazardSchedule::runOnMachineFunction(MachineFunction &MF) { - - const MipsSubtarget *STI = - &static_cast(MF.getSubtarget()); - - // Forbidden slot hazards are only defined for MIPSR6 but not microMIPSR6. - if (!STI->hasMips32r6() || STI->inMicroMipsMode()) - return false; - - bool Changed = false; - const MipsInstrInfo *TII = STI->getInstrInfo(); - - for (MachineFunction::iterator FI = MF.begin(); FI != MF.end(); ++FI) { - for (Iter I = FI->begin(); I != FI->end(); ++I) { - - // Forbidden slot hazard handling. Use lookahead over state. - if (!TII->HasForbiddenSlot(*I)) - continue; - - Iter Inst; - bool LastInstInFunction = - std::next(I) == FI->end() && std::next(FI) == MF.end(); - if (!LastInstInFunction) { - std::pair Res = getNextMachineInstr(std::next(I), &*FI); - LastInstInFunction |= Res.second; - Inst = Res.first; - } - - if (LastInstInFunction || !TII->SafeInForbiddenSlot(*Inst)) { - Changed = true; - MIBundleBuilder(&*I) - .append(BuildMI(MF, I->getDebugLoc(), TII->get(Mips::NOP))); - NumInsertedNops++; - } - } - } - return Changed; -} Index: lib/Target/Mips/MipsTargetMachine.cpp =================================================================== --- lib/Target/Mips/MipsTargetMachine.cpp +++ lib/Target/Mips/MipsTargetMachine.cpp @@ -294,8 +294,16 @@ // forbidden slot/ hazards for MIPSR6 which the hazard schedule pass will // fix. Any new pass must come before the hazard schedule pass. addPass(createMipsDelaySlotFillerPass()); - addPass(createMipsLongBranchPass()); - addPass(createMipsHazardSchedule()); + + // MipsLongBranchPass and MipsHazardSchedule passes are joined to one + // pass because of mutual conflict. When MipsHazardSchedule insert 'nop's, + // it potentially breaks some jumps, so they have to be expanded to long + // branches. When some branch is expanded to long branch, it potentially + // creates a hazard situation, which should be fixed by adding nops. + // This pass now combine these two passes, and runs them alternately until + // one of them reports no changes were made. + addPass(createMipsBranchExpansion()); + addPass(createMipsConstantIslandPass()); } Index: test/CodeGen/Mips/branch-relaxation-with-hazard.ll =================================================================== --- /dev/null +++ test/CodeGen/Mips/branch-relaxation-with-hazard.ll @@ -0,0 +1,67 @@ +; RUN: llc -O0 -mcpu=mips32r6 -relocation-model=pic < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-PIC +; RUN: llc -O0 -mcpu=mips32r6 -relocation-model=static < %s -o - | FileCheck %s --check-prefixes=CHECK-STATIC + +target triple = "mips-img-linux-gnu" + +declare i32 @boo(...) +declare i32 @foo(...) + +define i32 @main(i32 signext %argc, i8** %argv) { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 4 + store i32 0, i32* %retval, align 4 + store i32 %argc, i32* %argc.addr, align 4 + store i8** %argv, i8*** %argv.addr, align 4 + %0 = load i32, i32* %argc.addr, align 4 + %cmp = icmp sgt i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end4 + +if.then: + %1 = load i32, i32* %argc.addr, align 4 + %cmp1 = icmp sgt i32 %1, 3 + br i1 %cmp1, label %if.then2, label %if.end + +if.then2: + %call = call i32 bitcast (i32 (...)* @boo to i32 ()*)() + store i32 %call, i32* %retval, align 4 + br label %return + +if.end: + call void asm sideeffect ".space 130998", "~{$1}"() + %call3 = call i32 bitcast (i32 (...)* @foo to i32 ()*)() + store i32 %call3, i32* %retval, align 4 + br label %return + +if.end4: + store i32 0, i32* %retval, align 4 + br label %return + +return: + %2 = load i32, i32* %retval, align 4 + ret i32 %2 + +; CHECK: main: +; CHECK: # %bb.1: +; CHECK-PIC: addiu +; CHECK-PIC: sw +; CHECK-PIC: lui +; CHECK-PIC: addiu +; CHECK-PIC: balc +; CHECK-PIC: addu +; CHECK-PIC: lw +; CHECK-PIC: addiu +; CHECK-PIC: jrc +; CHECK-PIC: bc +; CHECK-PIC: bnezc +; CHECK-PIC: nop +; CHECK-PIC: bc + +; CHECK-STATIC: bc +; CHECK-STATIC: j +; CHECK-STATIC: bnezc +; CHECK-STATIC: nop +; CHECK-STATIC: j + +}