diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h --- a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h @@ -72,6 +72,7 @@ #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include #include +#include #include namespace llvm { @@ -513,6 +514,10 @@ /// Current target configuration. Controls how the pass handles errors. const TargetPassConfig *TPC; + std::deque Worklist; + MachineBasicBlock::iterator MII; + MachineBasicBlock::iterator End; + /// Assign the register bank of each operand of \p MI. /// \return True on success, false otherwise. bool assignInstr(MachineInstr &MI); @@ -663,6 +668,11 @@ /// inst.getOperand(argument.getOperandNo()).setReg(Tmp) /// \endcode bool runOnMachineFunction(MachineFunction &MF) override; + + /// Add a newly created basic block at the front of the worklist. + void prependBBToWorkQueue(MachineBasicBlock *BB); + /// Set the next instruction that should be assigned register banks. + void setNextInstruction(MachineBasicBlock::iterator MII); }; } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h --- a/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h @@ -30,6 +30,7 @@ class MachineInstr; class MachineRegisterInfo; class raw_ostream; +class RegBankSelect; class RegisterBank; class TargetInstrInfo; class TargetRegisterClass; @@ -565,7 +566,8 @@ static void applyDefaultMapping(const OperandsMapper &OpdMapper); /// See ::applyMapping. - virtual void applyMappingImpl(const OperandsMapper &OpdMapper) const { + virtual void applyMappingImpl(RegBankSelect &RegBankSelectPass, + const OperandsMapper &OpdMapper) const { llvm_unreachable("The target has to implement that part"); } @@ -712,14 +714,15 @@ /// /// Therefore, getting the mapping and applying it should be kept in /// sync. - void applyMapping(const OperandsMapper &OpdMapper) const { + void applyMapping(RegBankSelect &RegBankSelectPass, + const OperandsMapper &OpdMapper) const { // The only mapping we know how to handle is the default mapping. if (OpdMapper.getInstrMapping().getID() == DefaultMappingID) return applyDefaultMapping(OpdMapper); // For other mapping, the target needs to do the right thing. // If that means calling applyDefaultMapping, fine, but this // must be explicitly stated. - applyMappingImpl(OpdMapper); + applyMappingImpl(RegBankSelectPass, OpdMapper); } /// Get the size in bits of \p Reg. diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -615,7 +615,7 @@ // Second, rewrite the instruction. LLVM_DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n'); - RBI->applyMapping(OpdMapper); + RBI->applyMapping(*this, OpdMapper); return true; } @@ -695,12 +695,16 @@ // Use a RPOT to make sure all registers are assigned before we choose // the best mapping of the current instruction. ReversePostOrderTraversal RPOT(&MF); - for (MachineBasicBlock *MBB : RPOT) { + std::copy(RPOT.begin(), RPOT.end(), std::back_inserter(Worklist)); + while (!Worklist.empty()) { + MachineBasicBlock *MBB = Worklist.front(); + Worklist.pop_front(); + // Set a sensible insertion point so that subsequent calls to // MIRBuilder. MIRBuilder.setMBB(*MBB); - for (MachineBasicBlock::iterator MII = MBB->begin(), End = MBB->end(); - MII != End;) { + End = MBB->end(); + for (MII = MBB->begin(); MII != End;) { // MI might be invalidated by the assignment, so move the // iterator before hand. MachineInstr &MI = *MII++; @@ -728,18 +732,6 @@ "unable to map instruction", MI); return false; } - - // It's possible the mapping changed control flow, and moved the following - // instruction to a new block, so figure out the new parent. - if (MII != End) { - MachineBasicBlock *NextInstBB = MII->getParent(); - if (NextInstBB != MBB) { - LLVM_DEBUG(dbgs() << "Instruction mapping changed control flow\n"); - MBB = NextInstBB; - MIRBuilder.setMBB(*MBB); - End = MBB->end(); - } - } } } @@ -1102,3 +1094,14 @@ } OS << LocalFreq << " * " << LocalCost << " + " << NonLocalCost; } + +void RegBankSelect::prependBBToWorkQueue(MachineBasicBlock *BB) { + Worklist.push_front(BB); +} + +void RegBankSelect::setNextInstruction(MachineBasicBlock::iterator MII) { + this->MII = MII; + MachineBasicBlock *NextInstBB = MII->getParent(); + End = NextInstBB->end(); + MIRBuilder.setMBB(*NextInstBB); +} diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h @@ -103,7 +103,8 @@ /// This class provides the information for the target register banks. class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo { /// See RegisterBankInfo::applyMapping. - void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + void applyMappingImpl(RegBankSelect &RegBankSelectPass, + const OperandsMapper &OpdMapper) const override; /// Get an instruction mapping where all the operands map to /// the same register bank and have similar size. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -383,7 +383,7 @@ } void AArch64RegisterBankInfo::applyMappingImpl( - const OperandsMapper &OpdMapper) const { + RegBankSelect &RegBankSelectPass, const OperandsMapper &OpdMapper) const { switch (OpdMapper.getMI().getOpcode()) { case TargetOpcode::G_OR: case TargetOpcode::G_BITCAST: diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -53,18 +53,18 @@ MachineRegisterInfo &MRI, ArrayRef OpIndices) const; - bool executeInWaterfallLoop( - MachineIRBuilder &B, - iterator_range Range, - SmallSet &SGPROperandRegs, - MachineRegisterInfo &MRI) const; - - bool executeInWaterfallLoop(MachineIRBuilder &B, - MachineInstr &MI, + bool executeInWaterfallLoop(RegBankSelect &RegBankSelectPass, + MachineIRBuilder &B, + iterator_range Range, + SmallSet &SGPROperandRegs, + MachineRegisterInfo &MRI) const; + + bool executeInWaterfallLoop(RegBankSelect &RegBankSelectPass, + MachineIRBuilder &B, MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef OpIndices) const; - bool executeInWaterfallLoop(MachineInstr &MI, - MachineRegisterInfo &MRI, + bool executeInWaterfallLoop(RegBankSelect &RegBankSelectPass, + MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef OpIndices) const; void constrainOpWithReadfirstlane(MachineInstr &MI, MachineRegisterInfo &MRI, @@ -75,11 +75,11 @@ bool applyMappingLoad(MachineInstr &MI, const OperandsMapper &OpdMapper, MachineRegisterInfo &MRI) const; - bool - applyMappingImage(MachineInstr &MI, - const OperandsMapper &OpdMapper, - MachineRegisterInfo &MRI, int RSrcIdx) const; - bool applyMappingSBufferLoad(const OperandsMapper &OpdMapper) const; + bool applyMappingImage(RegBankSelect &RegBankSelectPass, MachineInstr &MI, + const OperandsMapper &OpdMapper, + MachineRegisterInfo &MRI, int RSrcIdx) const; + bool applyMappingSBufferLoad(RegBankSelect &RegBankSelectPass, + const OperandsMapper &OpdMapper) const; bool applyMappingBFE(const OperandsMapper &OpdMapper, bool Signed) const; @@ -89,11 +89,13 @@ std::pair splitBufferOffsets(MachineIRBuilder &B, Register Offset) const; - MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B, + MachineInstr *selectStoreIntrinsic(RegBankSelect &RegBankSelectPass, + MachineIRBuilder &B, MachineInstr &MI) const; /// See RegisterBankInfo::applyMapping. - void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + void applyMappingImpl(RegBankSelect &RegBankSelectPass, + const OperandsMapper &OpdMapper) const override; const ValueMapping *getValueMappingForPtr(const MachineRegisterInfo &MRI, Register Ptr) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -79,6 +79,7 @@ #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -703,10 +704,9 @@ /// There is additional complexity to try for compare values to identify the /// unique values used. bool AMDGPURegisterBankInfo::executeInWaterfallLoop( - MachineIRBuilder &B, - iterator_range Range, - SmallSet &SGPROperandRegs, - MachineRegisterInfo &MRI) const { + RegBankSelect &RegBankSelectPass, MachineIRBuilder &B, + iterator_range Range, + SmallSet &SGPROperandRegs, MachineRegisterInfo &MRI) const { SmallVector ResultRegs; SmallVector InitResultRegs; SmallVector PhiRegs; @@ -771,6 +771,9 @@ MF->insert(MBBI, LoopBB); MF->insert(MBBI, RestoreExecBB); MF->insert(MBBI, RemainderBB); + RegBankSelectPass.prependBBToWorkQueue(RestoreExecBB); + RegBankSelectPass.prependBBToWorkQueue(RemainderBB); + RegBankSelectPass.prependBBToWorkQueue(LoopBB); LoopBB->addSuccessor(RestoreExecBB); LoopBB->addSuccessor(LoopBB); @@ -1034,6 +1037,8 @@ // instructions will be in the remainder. B.setInsertPt(*RemainderBB, RemainderBB->begin()); + RegBankSelectPass.setNextInstruction(NewBegin); + return true; } @@ -1057,8 +1062,8 @@ } bool AMDGPURegisterBankInfo::executeInWaterfallLoop( - MachineIRBuilder &B, MachineInstr &MI, MachineRegisterInfo &MRI, - ArrayRef OpIndices) const { + RegBankSelect &RegBankSelectPass, MachineIRBuilder &B, MachineInstr &MI, + MachineRegisterInfo &MRI, ArrayRef OpIndices) const { // Use a set to avoid extra readfirstlanes in the case where multiple operands // are the same register. SmallSet SGPROperandRegs; @@ -1067,15 +1072,15 @@ return false; MachineBasicBlock::iterator I = MI.getIterator(); - return executeInWaterfallLoop(B, make_range(I, std::next(I)), - SGPROperandRegs, MRI); + return executeInWaterfallLoop( + RegBankSelectPass, B, make_range(I, std::next(I)), SGPROperandRegs, MRI); } bool AMDGPURegisterBankInfo::executeInWaterfallLoop( - MachineInstr &MI, MachineRegisterInfo &MRI, - ArrayRef OpIndices) const { + RegBankSelect &RegBankSelectPass, MachineInstr &MI, + MachineRegisterInfo &MRI, ArrayRef OpIndices) const { MachineIRBuilder B(MI); - return executeInWaterfallLoop(B, MI, MRI, OpIndices); + return executeInWaterfallLoop(RegBankSelectPass, B, MI, MRI, OpIndices); } // Legalize an operand that must be an SGPR by inserting a readfirstlane. @@ -1290,7 +1295,8 @@ } bool AMDGPURegisterBankInfo::applyMappingImage( - MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, + RegBankSelect &RegBankSelectPass, MachineInstr &MI, + const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, MachineRegisterInfo &MRI, int RsrcIdx) const { const int NumDefs = MI.getNumExplicitDefs(); @@ -1312,7 +1318,7 @@ SGPRIndexes.push_back(I); } - executeInWaterfallLoop(MI, MRI, SGPRIndexes); + executeInWaterfallLoop(RegBankSelectPass, MI, MRI, SGPRIndexes); return true; } @@ -1414,7 +1420,7 @@ } bool AMDGPURegisterBankInfo::applyMappingSBufferLoad( - const OperandsMapper &OpdMapper) const { + RegBankSelect &RegBankSelectPass, const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); MachineRegisterInfo &MRI = OpdMapper.getMRI(); @@ -1514,8 +1520,9 @@ SmallSet OpsToWaterfall; OpsToWaterfall.insert(RSrc); - executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()), - OpsToWaterfall, MRI); + executeInWaterfallLoop(RegBankSelectPass, B, + make_range(Span.begin(), Span.end()), OpsToWaterfall, + MRI); } if (NumLoads != 1) { @@ -1788,12 +1795,12 @@ return (CachePolicy >> 3) & 1; } - MachineInstr * -AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B, +AMDGPURegisterBankInfo::selectStoreIntrinsic(RegBankSelect &RegBankSelectPass, + MachineIRBuilder &B, MachineInstr &MI) const { - MachineRegisterInfo &MRI = *B.getMRI(); - executeInWaterfallLoop(B, MI, MRI, {2, 4}); + MachineRegisterInfo &MRI = *B.getMRI(); + executeInWaterfallLoop(RegBankSelectPass, B, MI, MRI, {2, 4}); // FIXME: DAG lowering brokenly changes opcode based on FP vs. integer. @@ -2134,7 +2141,7 @@ } void AMDGPURegisterBankInfo::applyMappingImpl( - const OperandsMapper &OpdMapper) const { + RegBankSelect &RegBankSelectPass, const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); unsigned Opc = MI.getOpcode(); MachineRegisterInfo &MRI = OpdMapper.getMRI(); @@ -2751,7 +2758,7 @@ if (DstRegs.empty()) { applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, { 2 }); + executeInWaterfallLoop(RegBankSelectPass, MI, MRI, {2}); if (NeedCopyToVGPR) { // We don't want a phi for this temporary reg. @@ -2809,8 +2816,9 @@ // waterfall loop logic. B.setInstr(*Span.begin()); MI.eraseFromParent(); - executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()), - OpsToWaterfall, MRI); + executeInWaterfallLoop(RegBankSelectPass, B, + make_range(Span.begin(), Span.end()), OpsToWaterfall, + MRI); if (NeedCopyToVGPR) { MachineBasicBlock *LoopBB = Extract1->getParent(); @@ -2875,7 +2883,7 @@ if (InsRegs.empty()) { - executeInWaterfallLoop(MI, MRI, { 3 }); + executeInWaterfallLoop(RegBankSelectPass, MI, MRI, {3}); // Re-insert the constant offset add inside the waterfall loop. if (ShouldMoveIndexIntoLoop) { @@ -2938,8 +2946,9 @@ // Figure out the point after the waterfall loop before mangling the control // flow. - executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()), - OpsToWaterfall, MRI); + executeInWaterfallLoop(RegBankSelectPass, B, + make_range(Span.begin(), Span.end()), OpsToWaterfall, + MRI); // The insertion point is now right after the original instruction. // @@ -2970,7 +2979,7 @@ case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT: case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: { applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, {1, 4}); + executeInWaterfallLoop(RegBankSelectPass, MI, MRI, {1, 4}); return; } case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP: @@ -2986,23 +2995,23 @@ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: { applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, {2, 5}); + executeInWaterfallLoop(RegBankSelectPass, MI, MRI, {2, 5}); return; } case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: { applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, {2, 5}); + executeInWaterfallLoop(RegBankSelectPass, MI, MRI, {2, 5}); return; } case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: { applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, {3, 6}); + executeInWaterfallLoop(RegBankSelectPass, MI, MRI, {3, 6}); return; } case AMDGPU::G_AMDGPU_S_BUFFER_LOAD: { - applyMappingSBufferLoad(OpdMapper); + applyMappingSBufferLoad(RegBankSelectPass, OpdMapper); return; } case AMDGPU::G_INTRINSIC: { @@ -3069,13 +3078,14 @@ // Non-images can have complications from operands that allow both SGPR // and VGPR. For now it's too complicated to figure out the final opcode // to derive the register bank from the MCInstrDesc. - applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg); + applyMappingImage(RegBankSelectPass, MI, OpdMapper, MRI, + RSrcIntrin->RsrcArg); return; } case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: { unsigned N = MI.getNumExplicitOperands() - 2; applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, { N }); + executeInWaterfallLoop(RegBankSelectPass, MI, MRI, {N}); return; } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { @@ -3126,7 +3136,8 @@ // and VGPR. For now it's too complicated to figure out the final opcode // to derive the register bank from the MCInstrDesc. if (RSrcIntrin->IsImage) { - applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg); + applyMappingImage(RegBankSelectPass, MI, OpdMapper, MRI, + RSrcIntrin->RsrcArg); return; } } diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.h b/llvm/lib/Target/Mips/MipsRegisterBankInfo.h --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.h +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.h @@ -42,7 +42,8 @@ /// G_UNMERGE and erase instructions that became dead in the process. We /// manually assign bank to def operand of all new instructions that were /// created in the process since they will not end up in RegBankSelect loop. - void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + void applyMappingImpl(RegBankSelect &RegBankSelectPass, + const OperandsMapper &OpdMapper) const override; /// RegBankSelect determined that s64 operand is better to be split into two /// s32 operands in gprb. Here we manually set register banks of def operands diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -726,7 +726,7 @@ } void MipsRegisterBankInfo::applyMappingImpl( - const OperandsMapper &OpdMapper) const { + RegBankSelect &RegBankSelectPass, const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); InstListTy NewInstrs; MachineFunction *MF = MI.getMF(); diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.h b/llvm/lib/Target/X86/X86RegisterBankInfo.h --- a/llvm/lib/Target/X86/X86RegisterBankInfo.h +++ b/llvm/lib/Target/X86/X86RegisterBankInfo.h @@ -71,7 +71,8 @@ getInstrAlternativeMappings(const MachineInstr &MI) const override; /// See RegisterBankInfo::applyMapping. - void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + void applyMappingImpl(RegBankSelect &RegBankSelectPass, + const OperandsMapper &OpdMapper) const override; const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp @@ -270,7 +270,7 @@ } void X86RegisterBankInfo::applyMappingImpl( - const OperandsMapper &OpdMapper) const { + RegBankSelect &RegBankSelectPass, const OperandsMapper &OpdMapper) const { return applyDefaultMapping(OpdMapper); }