Index: llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -365,6 +365,8 @@ } void stopObservingChanges() { State.Observer = nullptr; } + + bool isObservingChanges() const { return State.Observer != nullptr; } /// @} /// Set the debug location to \p DL for all the next build instructions. Index: llvm/include/llvm/CodeGen/RegisterBankInfo.h =================================================================== --- llvm/include/llvm/CodeGen/RegisterBankInfo.h +++ llvm/include/llvm/CodeGen/RegisterBankInfo.h @@ -29,6 +29,7 @@ namespace llvm { class MachineInstr; +class MachineIRBuilder; class MachineRegisterInfo; class raw_ostream; class TargetInstrInfo; @@ -571,8 +572,9 @@ static void applyDefaultMapping(const OperandsMapper &OpdMapper); /// See ::applyMapping. - virtual void applyMappingImpl(const OperandsMapper &OpdMapper) const { - llvm_unreachable("The target has to implement that part"); + virtual void applyMappingImpl(MachineIRBuilder &Builder, + const OperandsMapper &OpdMapper) const { + llvm_unreachable("The target has to implement this"); } public: @@ -729,14 +731,15 @@ /// /// Therefore, getting the mapping and applying it should be kept in /// sync. - void applyMapping(const OperandsMapper &OpdMapper) const { + void applyMapping(MachineIRBuilder &Builder, + const OperandsMapper &OpdMapper) const { // The only mapping we know how to handle is the default mapping. if (OpdMapper.getInstrMapping().getID() == DefaultMappingID) return applyDefaultMapping(OpdMapper); // For other mapping, the target needs to do the right thing. // If that means calling applyDefaultMapping, fine, but this // must be explicitly stated. - applyMappingImpl(OpdMapper); + applyMappingImpl(Builder, OpdMapper); } /// Get the size in bits of \p Reg. Index: llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -623,7 +623,7 @@ // Second, rewrite the instruction. LLVM_DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n'); - RBI->applyMapping(OpdMapper); + RBI->applyMapping(MIRBuilder, OpdMapper); return true; } Index: llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h +++ llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h @@ -103,7 +103,8 @@ /// This class provides the information for the target register banks. class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo { /// See RegisterBankInfo::applyMapping. - void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + void applyMappingImpl(MachineIRBuilder &Builder, + const OperandsMapper &OpdMapper) const override; /// Get an instruction mapping where all the operands map to /// the same register bank and have similar size. Index: llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -389,7 +389,7 @@ } void AArch64RegisterBankInfo::applyMappingImpl( - const OperandsMapper &OpdMapper) const { + MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const { switch (OpdMapper.getMI().getOpcode()) { case TargetOpcode::G_OR: case TargetOpcode::G_BITCAST: Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -53,43 +53,36 @@ MachineRegisterInfo &MRI, ArrayRef OpIndices) const; - bool executeInWaterfallLoop( - MachineIRBuilder &B, - iterator_range Range, - SmallSet &SGPROperandRegs, - MachineRegisterInfo &MRI) const; + bool executeInWaterfallLoop(MachineIRBuilder &B, + iterator_range Range, + SmallSet &SGPROperandRegs) const; Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Src) const; - bool executeInWaterfallLoop(MachineIRBuilder &B, - MachineInstr &MI, - MachineRegisterInfo &MRI, - ArrayRef OpIndices) const; - bool executeInWaterfallLoop(MachineInstr &MI, - MachineRegisterInfo &MRI, + bool executeInWaterfallLoop(MachineIRBuilder &B, MachineInstr &MI, ArrayRef OpIndices) const; - void constrainOpWithReadfirstlane(MachineInstr &MI, MachineRegisterInfo &MRI, + void constrainOpWithReadfirstlane(MachineIRBuilder &B, MachineInstr &MI, unsigned OpIdx) const; - bool applyMappingDynStackAlloc(MachineInstr &MI, + bool applyMappingDynStackAlloc(MachineIRBuilder &B, const OperandsMapper &OpdMapper, - MachineRegisterInfo &MRI) const; - bool applyMappingLoad(MachineInstr &MI, - const OperandsMapper &OpdMapper, - MachineRegisterInfo &MRI) const; - bool - applyMappingImage(MachineInstr &MI, - const OperandsMapper &OpdMapper, - MachineRegisterInfo &MRI, int RSrcIdx) const; + MachineInstr &MI) const; + bool applyMappingLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper, + MachineInstr &MI) const; + bool applyMappingImage(MachineIRBuilder &B, MachineInstr &MI, + const OperandsMapper &OpdMapper, int RSrcIdx) const; unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset, Register &VOffsetReg, Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const; - bool applyMappingSBufferLoad(const OperandsMapper &OpdMapper) const; + bool applyMappingSBufferLoad(MachineIRBuilder &B, + const OperandsMapper &OpdMapper) const; - bool applyMappingBFE(const OperandsMapper &OpdMapper, bool Signed) const; + bool applyMappingBFE(MachineIRBuilder &B, const OperandsMapper &OpdMapper, + bool Signed) const; - bool applyMappingMAD_64_32(const OperandsMapper &OpdMapper) const; + bool applyMappingMAD_64_32(MachineIRBuilder &B, + const OperandsMapper &OpdMapper) const; Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const; @@ -98,7 +91,8 @@ splitBufferOffsets(MachineIRBuilder &B, Register Offset) const; /// See RegisterBankInfo::applyMapping. - void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + void applyMappingImpl(MachineIRBuilder &Builder, + const OperandsMapper &OpdMapper) const override; const ValueMapping *getValueMappingForPtr(const MachineRegisterInfo &MRI, Register Ptr) const; @@ -186,12 +180,9 @@ getInstrMapping(const MachineInstr &MI) const override; private: - - bool foldExtractEltToCmpSelect(MachineInstr &MI, - MachineRegisterInfo &MRI, + bool foldExtractEltToCmpSelect(MachineIRBuilder &B, MachineInstr &MI, const OperandsMapper &OpdMapper) const; - bool foldInsertEltToCmpSelect(MachineInstr &MI, - MachineRegisterInfo &MRI, + bool foldInsertEltToCmpSelect(MachineIRBuilder &B, MachineInstr &MI, const OperandsMapper &OpdMapper) const; }; } // End llvm namespace. Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -97,19 +97,25 @@ // Observer to apply a register bank to new registers created by LegalizerHelper. class ApplyRegBankMapping final : public GISelChangeObserver { private: + MachineIRBuilder &B; const AMDGPURegisterBankInfo &RBI; MachineRegisterInfo &MRI; const RegisterBank *NewBank; SmallVector NewInsts; public: - ApplyRegBankMapping(const AMDGPURegisterBankInfo &RBI_, + ApplyRegBankMapping(MachineIRBuilder &B, const AMDGPURegisterBankInfo &RBI_, MachineRegisterInfo &MRI_, const RegisterBank *RB) - : RBI(RBI_), MRI(MRI_), NewBank(RB) {} + : B(B), RBI(RBI_), MRI(MRI_), NewBank(RB) { + assert(!B.isObservingChanges()); + B.setChangeObserver(*this); + } ~ApplyRegBankMapping() { for (MachineInstr *MI : NewInsts) applyBank(*MI); + + B.stopObservingChanges(); } /// Set any registers that don't have a set register class or bank to SALU. @@ -131,7 +137,8 @@ // Replace the extension with a select, which really uses the boolean // source. - MachineIRBuilder B(MI); + B.setInsertPt(*MI.getParent(), MI); + auto True = B.buildConstant(S32, Opc == AMDGPU::G_SEXT ? -1 : 1); auto False = B.buildConstant(S32, 0); B.buildSelect(DstReg, SrcReg, True, False); @@ -758,11 +765,8 @@ /// There is additional complexity to try for compare values to identify the /// unique values used. bool AMDGPURegisterBankInfo::executeInWaterfallLoop( - MachineIRBuilder &B, - iterator_range Range, - SmallSet &SGPROperandRegs, - MachineRegisterInfo &MRI) const { - + MachineIRBuilder &B, iterator_range Range, + SmallSet &SGPROperandRegs) const { // Track use registers which have already been expanded with a readfirstlane // sequence. This may have multiple uses if moving a sequence. DenseMap WaterfalledRegMap; @@ -787,6 +791,7 @@ const int OrigRangeSize = std::distance(Range.begin(), Range.end()); #endif + MachineRegisterInfo &MRI = *B.getMRI(); Register SaveExecReg = MRI.createVirtualRegister(WaveRC); Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC); @@ -986,37 +991,28 @@ } bool AMDGPURegisterBankInfo::executeInWaterfallLoop( - MachineIRBuilder &B, MachineInstr &MI, MachineRegisterInfo &MRI, - ArrayRef OpIndices) const { + MachineIRBuilder &B, MachineInstr &MI, ArrayRef OpIndices) const { // Use a set to avoid extra readfirstlanes in the case where multiple operands // are the same register. SmallSet SGPROperandRegs; - if (!collectWaterfallOperands(SGPROperandRegs, MI, MRI, OpIndices)) + if (!collectWaterfallOperands(SGPROperandRegs, MI, *B.getMRI(), OpIndices)) return false; MachineBasicBlock::iterator I = MI.getIterator(); return executeInWaterfallLoop(B, make_range(I, std::next(I)), - SGPROperandRegs, MRI); -} - -bool AMDGPURegisterBankInfo::executeInWaterfallLoop( - MachineInstr &MI, MachineRegisterInfo &MRI, - ArrayRef OpIndices) const { - MachineIRBuilder B(MI); - return executeInWaterfallLoop(B, MI, MRI, OpIndices); + SGPROperandRegs); } // Legalize an operand that must be an SGPR by inserting a readfirstlane. void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane( - MachineInstr &MI, MachineRegisterInfo &MRI, unsigned OpIdx) const { + MachineIRBuilder &B, MachineInstr &MI, unsigned OpIdx) const { Register Reg = MI.getOperand(OpIdx).getReg(); + MachineRegisterInfo &MRI = *B.getMRI(); const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI); if (Bank == &AMDGPU::SGPRRegBank) return; - MachineIRBuilder B(MI); - Reg = buildReadFirstLane(B, MRI, Reg); MI.getOperand(OpIdx).setReg(Reg); } @@ -1048,9 +1044,11 @@ return LLT::fixed_vector(128 / EltTy.getSizeInBits(), EltTy); } -bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI, - const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, - MachineRegisterInfo &MRI) const { +bool AMDGPURegisterBankInfo::applyMappingLoad( + MachineIRBuilder &B, + const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, + MachineInstr &MI) const { + MachineRegisterInfo &MRI = *B.getMRI(); Register DstReg = MI.getOperand(0).getReg(); const LLT LoadTy = MRI.getType(DstReg); unsigned LoadSize = LoadTy.getSizeInBits(); @@ -1076,8 +1074,7 @@ Register PtrReg = MI.getOperand(1).getReg(); - ApplyRegBankMapping O(*this, MRI, &AMDGPU::SGPRRegBank); - MachineIRBuilder B(MI, O); + ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank); if (LoadSize == 32) { // This is an extending load from a sub-dword size. Widen the memory @@ -1098,10 +1095,7 @@ // 96-bit loads are only available for vector loads. We need to split this // into a 64-bit part, and 32 (unless we can widen to a 128-bit load). if (MMO->getAlign() < Align(16)) { - MachineFunction *MF = MI.getParent()->getParent(); - ApplyRegBankMapping ApplyBank(*this, MRI, DstBank); - MachineIRBuilder B(MI, ApplyBank); - LegalizerHelper Helper(*MF, ApplyBank, B); + LegalizerHelper Helper(B.getMF(), ApplyBank, B); LLT Part64, Part32; std::tie(Part64, Part32) = splitUnequalType(LoadTy, 64); if (Helper.reduceLoadStoreWidth(cast(MI), 0, Part64) != @@ -1144,9 +1138,8 @@ unsigned NumSplitParts = LoadTy.getSizeInBits() / MaxNonSmrdLoadSize; const LLT LoadSplitTy = LoadTy.divide(NumSplitParts); - ApplyRegBankMapping Observer(*this, MRI, &AMDGPU::VGPRRegBank); - MachineIRBuilder B(MI, Observer); - LegalizerHelper Helper(B.getMF(), Observer, B); + ApplyRegBankMapping O(B, *this, MRI, &AMDGPU::VGPRRegBank); + LegalizerHelper Helper(B.getMF(), O, B); if (LoadTy.isVector()) { if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized) @@ -1161,10 +1154,11 @@ } bool AMDGPURegisterBankInfo::applyMappingDynStackAlloc( - MachineInstr &MI, - const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, - MachineRegisterInfo &MRI) const { - const MachineFunction &MF = *MI.getMF(); + MachineIRBuilder &B, + const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, + MachineInstr &MI) const { + MachineRegisterInfo &MRI = *B.getMRI(); + const MachineFunction &MF = B.getMF(); const GCNSubtarget &ST = MF.getSubtarget(); const auto &TFI = *ST.getFrameLowering(); @@ -1188,8 +1182,7 @@ const SIMachineFunctionInfo *Info = MF.getInfo(); Register SPReg = Info->getStackPtrOffsetReg(); - ApplyRegBankMapping ApplyBank(*this, MRI, &AMDGPU::SGPRRegBank); - MachineIRBuilder B(MI, ApplyBank); + ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank); auto WaveSize = B.buildConstant(LLT::scalar(32), ST.getWavefrontSizeLog2()); auto ScaledSize = B.buildShl(IntPtrTy, AllocSize, WaveSize); @@ -1208,8 +1201,9 @@ } bool AMDGPURegisterBankInfo::applyMappingImage( - MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, - MachineRegisterInfo &MRI, int RsrcIdx) const { + MachineIRBuilder &B, MachineInstr &MI, + const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, + int RsrcIdx) const { const int NumDefs = MI.getNumExplicitDefs(); // The reported argument index is relative to the IR intrinsic call arguments, @@ -1230,7 +1224,7 @@ SGPRIndexes.push_back(I); } - executeInWaterfallLoop(MI, MRI, SGPRIndexes); + executeInWaterfallLoop(B, MI, SGPRIndexes); return true; } @@ -1320,7 +1314,7 @@ } bool AMDGPURegisterBankInfo::applyMappingSBufferLoad( - const OperandsMapper &OpdMapper) const { + MachineIRBuilder &B, const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); MachineRegisterInfo &MRI = OpdMapper.getMRI(); @@ -1350,7 +1344,6 @@ // immediate offsets. const Align Alignment = NumLoads > 1 ? Align(16 * NumLoads) : Align(1); - MachineIRBuilder B(MI); MachineFunction &MF = B.getMF(); Register SOffset; @@ -1421,7 +1414,7 @@ OpsToWaterfall.insert(RSrc); executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()), - OpsToWaterfall, MRI); + OpsToWaterfall); } if (NumLoads != 1) { @@ -1438,7 +1431,8 @@ return true; } -bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper, +bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B, + const OperandsMapper &OpdMapper, bool Signed) const { MachineInstr &MI = OpdMapper.getMI(); MachineRegisterInfo &MRI = OpdMapper.getMRI(); @@ -1464,8 +1458,7 @@ // There is no 64-bit vgpr bitfield extract instructions so the operation // is expanded to a sequence of instructions that implement the operation. - ApplyRegBankMapping ApplyBank(*this, MRI, &AMDGPU::VGPRRegBank); - MachineIRBuilder B(MI, ApplyBank); + ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank); const LLT S64 = LLT::scalar(64); // Shift the source operand so that extracted bits start at bit 0. @@ -1517,8 +1510,7 @@ // The scalar form packs the offset and width in a single operand. - ApplyRegBankMapping ApplyBank(*this, MRI, &AMDGPU::SGPRRegBank); - MachineIRBuilder B(MI, ApplyBank); + ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank); // Ensure the high bits are clear to insert the offset. auto OffsetMask = B.buildConstant(S32, maskTrailingOnes(6)); @@ -1546,7 +1538,7 @@ } bool AMDGPURegisterBankInfo::applyMappingMAD_64_32( - const OperandsMapper &OpdMapper) const { + MachineIRBuilder &B, const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); MachineRegisterInfo &MRI = OpdMapper.getMRI(); @@ -1575,8 +1567,6 @@ } // Keep the multiplication on the SALU. - MachineIRBuilder B(MI); - Register DstHi; Register DstLo = B.buildMul(S32, Src0, Src1).getReg(0); bool MulHiInVgpr = false; @@ -1916,8 +1906,9 @@ } bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect( - MachineInstr &MI, MachineRegisterInfo &MRI, - const OperandsMapper &OpdMapper) const { + MachineIRBuilder &B, MachineInstr &MI, + const OperandsMapper &OpdMapper) const { + MachineRegisterInfo &MRI = *B.getMRI(); Register VecReg = MI.getOperand(1).getReg(); Register Idx = MI.getOperand(2).getReg(); @@ -1935,7 +1926,6 @@ IsDivergentIdx, &Subtarget)) return false; - MachineIRBuilder B(MI); LLT S32 = LLT::scalar(32); const RegisterBank &DstBank = @@ -2014,9 +2004,10 @@ } bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect( - MachineInstr &MI, MachineRegisterInfo &MRI, - const OperandsMapper &OpdMapper) const { + MachineIRBuilder &B, MachineInstr &MI, + const OperandsMapper &OpdMapper) const { + MachineRegisterInfo &MRI = *B.getMRI(); Register VecReg = MI.getOperand(1).getReg(); Register Idx = MI.getOperand(3).getReg(); @@ -2033,7 +2024,6 @@ IsDivergentIdx, &Subtarget)) return false; - MachineIRBuilder B(MI); LLT S32 = LLT::scalar(32); const RegisterBank &DstBank = @@ -2103,8 +2093,9 @@ } void AMDGPURegisterBankInfo::applyMappingImpl( - const OperandsMapper &OpdMapper) const { + MachineIRBuilder &B, const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); + B.setInstrAndDebugLoc(MI); unsigned Opc = MI.getOpcode(); MachineRegisterInfo &MRI = OpdMapper.getMRI(); switch (Opc) { @@ -2123,7 +2114,6 @@ if (DefRegs.empty()) DefRegs.push_back(DstReg); - MachineIRBuilder B(MI); B.setInsertPt(*MI.getParent(), ++MI.getIterator()); Register NewDstReg = MRI.createGenericVirtualRegister(LLT::scalar(32)); @@ -2156,8 +2146,6 @@ // produce an invalid copy. We can only copy with some kind of compare to // get a vector boolean result. Insert a register bank copy that will be // correctly lowered to a compare. - MachineIRBuilder B(*MI.getParent()->getParent()); - for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { Register SrcReg = MI.getOperand(I).getReg(); const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI); @@ -2179,10 +2167,9 @@ substituteSimpleCopyRegs(OpdMapper, 0); // Promote SGPR/VGPR booleans to s32 - MachineFunction *MF = MI.getParent()->getParent(); - ApplyRegBankMapping ApplyBank(*this, MRI, DstBank); - MachineIRBuilder B(MI, ApplyBank); - LegalizerHelper Helper(*MF, ApplyBank, B); + ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank); + B.setInsertPt(B.getMBB(), MI); + LegalizerHelper Helper(B.getMF(), ApplyBank, B); if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized) llvm_unreachable("widen scalar should have succeeded"); @@ -2212,7 +2199,6 @@ Register NewDstReg = MRI.createGenericVirtualRegister(S32); MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank); MI.getOperand(BoolDstOp).setReg(NewDstReg); - MachineIRBuilder B(MI); if (HasCarryIn) { Register NewSrcReg = MRI.createGenericVirtualRegister(S32); @@ -2245,7 +2231,6 @@ const RegisterBank *CondBank = getRegBank(CondRegs[0], MRI, *TRI); if (CondBank == &AMDGPU::SGPRRegBank) { - MachineIRBuilder B(MI); const LLT S32 = LLT::scalar(32); Register NewCondReg = MRI.createGenericVirtualRegister(S32); MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank); @@ -2257,7 +2242,6 @@ if (DstTy.getSizeInBits() != 64) break; - MachineIRBuilder B(MI); LLT HalfTy = getHalfSizedType(DstTy); SmallVector DefRegs(OpdMapper.getVRegs(0)); @@ -2297,7 +2281,6 @@ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; if (CondBank == &AMDGPU::SGPRRegBank) { - MachineIRBuilder B(MI); const LLT S32 = LLT::scalar(32); Register NewCondReg = MRI.createGenericVirtualRegister(S32); MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank); @@ -2324,8 +2307,7 @@ break; MachineFunction *MF = MI.getParent()->getParent(); - ApplyRegBankMapping ApplyBank(*this, MRI, DstBank); - MachineIRBuilder B(MI, ApplyBank); + ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank); LegalizerHelper Helper(*MF, ApplyBank, B); if (Helper.widenScalar(MI, 0, LLT::scalar(32)) != @@ -2355,7 +2337,6 @@ // Depending on where the source registers came from, the generic code may // have decided to split the inputs already or not. If not, we still need to // extract the values. - MachineIRBuilder B(MI); if (Src0Regs.empty()) split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg()); @@ -2384,8 +2365,7 @@ // max combination. if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) { MachineFunction *MF = MI.getParent()->getParent(); - ApplyRegBankMapping Apply(*this, MRI, &AMDGPU::VGPRRegBank); - MachineIRBuilder B(MI, Apply); + ApplyRegBankMapping Apply(B, *this, MRI, &AMDGPU::VGPRRegBank); LegalizerHelper Helper(*MF, Apply, B); if (Helper.lowerAbsToMaxNeg(MI) != LegalizerHelper::Legalized) @@ -2420,8 +2400,7 @@ const LLT S32 = LLT::scalar(32); MachineBasicBlock *MBB = MI.getParent(); MachineFunction *MF = MBB->getParent(); - ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank); - MachineIRBuilder B(MI, ApplySALU); + ApplyRegBankMapping ApplySALU(B, *this, MRI, &AMDGPU::SGPRRegBank); if (DstTy.isVector()) { Register WideSrc0Lo, WideSrc0Hi; @@ -2459,10 +2438,7 @@ break; // Nothing to repair const LLT S32 = LLT::scalar(32); - MachineIRBuilder B(MI); - ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank); - GISelObserverWrapper Observer(&O); - B.setChangeObserver(Observer); + ApplyRegBankMapping O(B, *this, MRI, &AMDGPU::VGPRRegBank); // Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs // we would need to further expand, and doesn't let us directly set the @@ -2508,8 +2484,7 @@ if (Ty == S32) break; - ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank); - MachineIRBuilder B(MI, ApplyVALU); + ApplyRegBankMapping ApplyVALU(B, *this, MRI, &AMDGPU::VGPRRegBank); MachineFunction &MF = B.getMF(); LegalizerHelper Helper(MF, ApplyVALU, B); @@ -2539,8 +2514,7 @@ // (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo)) // (ffbh hi:lo) -> (umin (ffbh hi), (uaddsat (ffbh lo), 32)) // (ffbl hi:lo) -> (umin (uaddsat (ffbh hi), 32), (ffbh lo)) - ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank); - MachineIRBuilder B(MI, ApplyVALU); + ApplyRegBankMapping ApplyVALU(B, *this, MRI, &AMDGPU::VGPRRegBank); SmallVector SrcRegs(OpdMapper.getVRegs(1)); unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF ? (unsigned)AMDGPU::G_AMDGPU_FFBH_U32 @@ -2569,7 +2543,6 @@ assert(OpdMapper.getVRegs(1).empty()); - MachineIRBuilder B(MI); const RegisterBank *SrcBank = OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank; @@ -2654,11 +2627,9 @@ LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(SrcReg); - if (foldExtractEltToCmpSelect(MI, MRI, OpdMapper)) + if (foldExtractEltToCmpSelect(B, MI, OpdMapper)) return; - MachineIRBuilder B(MI); - const ValueMapping &DstMapping = OpdMapper.getInstrMapping().getOperandMapping(0); const RegisterBank *DstBank = DstMapping.BreakDown[0].RegBank; @@ -2693,7 +2664,7 @@ if (DstRegs.empty()) { applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, { 2 }); + executeInWaterfallLoop(B, MI, {2}); if (NeedCopyToVGPR) { // We don't want a phi for this temporary reg. @@ -2752,7 +2723,7 @@ B.setInstr(*Span.begin()); MI.eraseFromParent(); executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()), - OpsToWaterfall, MRI); + OpsToWaterfall); if (NeedCopyToVGPR) { MachineBasicBlock *LoopBB = Extract1->getParent(); @@ -2787,7 +2758,7 @@ if (substituteSimpleCopyRegs(OpdMapper, 1)) MRI.setType(MI.getOperand(1).getReg(), VecTy); - if (foldInsertEltToCmpSelect(MI, MRI, OpdMapper)) + if (foldInsertEltToCmpSelect(B, MI, OpdMapper)) return; const RegisterBank *IdxBank = @@ -2817,24 +2788,21 @@ if (InsRegs.empty()) { - executeInWaterfallLoop(MI, MRI, { 3 }); + executeInWaterfallLoop(B, MI, {3}); // Re-insert the constant offset add inside the waterfall loop. if (ShouldMoveIndexIntoLoop) { - MachineIRBuilder B(MI); reinsertVectorIndexAdd(B, MI, 3, ConstOffset); } return; } - assert(InsTy.getSizeInBits() == 64); const LLT S32 = LLT::scalar(32); LLT Vec32 = LLT::fixed_vector(2 * VecTy.getNumElements(), 32); - MachineIRBuilder B(MI); auto CastSrc = B.buildBitcast(Vec32, SrcReg); auto One = B.buildConstant(S32, 1); @@ -2881,7 +2849,7 @@ // Figure out the point after the waterfall loop before mangling the control // flow. executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()), - OpsToWaterfall, MRI); + OpsToWaterfall); // The insertion point is now right after the original instruction. // @@ -2913,7 +2881,7 @@ case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT: case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: { applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, {1, 4}); + executeInWaterfallLoop(B, MI, {1, 4}); return; } case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP: @@ -2929,23 +2897,23 @@ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: { applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, {2, 5}); + executeInWaterfallLoop(B, MI, {2, 5}); return; } case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: { applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, {2, 5}); + executeInWaterfallLoop(B, MI, {2, 5}); return; } case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: { applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, {3, 6}); + executeInWaterfallLoop(B, MI, {3, 6}); return; } case AMDGPU::G_AMDGPU_S_BUFFER_LOAD: { - applyMappingSBufferLoad(OpdMapper); + applyMappingSBufferLoad(B, OpdMapper); return; } case AMDGPU::G_INTRINSIC: { @@ -2958,7 +2926,7 @@ // Make sure the index is an SGPR. It doesn't make sense to run this in a // waterfall loop, so assume it's a uniform value. - constrainOpWithReadfirstlane(MI, MRI, 3); // Index + constrainOpWithReadfirstlane(B, MI, 3); // Index return; } case Intrinsic::amdgcn_writelane: { @@ -2967,8 +2935,8 @@ assert(OpdMapper.getVRegs(3).empty()); substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val - constrainOpWithReadfirstlane(MI, MRI, 2); // Source value - constrainOpWithReadfirstlane(MI, MRI, 3); // Index + constrainOpWithReadfirstlane(B, MI, 2); // Source value + constrainOpWithReadfirstlane(B, MI, 3); // Index return; } case Intrinsic::amdgcn_interp_p1: @@ -2981,7 +2949,7 @@ // Readlane for m0 value, which is always the last operand. // FIXME: Should this be a waterfall loop instead? - constrainOpWithReadfirstlane(MI, MRI, MI.getNumOperands() - 1); // Index + constrainOpWithReadfirstlane(B, MI, MI.getNumOperands() - 1); // Index return; } case Intrinsic::amdgcn_interp_inreg_p10: @@ -2995,19 +2963,19 @@ // Doing a waterfall loop over these wouldn't make any sense. substituteSimpleCopyRegs(OpdMapper, 2); substituteSimpleCopyRegs(OpdMapper, 3); - constrainOpWithReadfirstlane(MI, MRI, 4); - constrainOpWithReadfirstlane(MI, MRI, 5); + constrainOpWithReadfirstlane(B, MI, 4); + constrainOpWithReadfirstlane(B, MI, 5); return; } case Intrinsic::amdgcn_sbfe: - applyMappingBFE(OpdMapper, true); + applyMappingBFE(B, OpdMapper, true); return; case Intrinsic::amdgcn_ubfe: - applyMappingBFE(OpdMapper, false); + applyMappingBFE(B, OpdMapper, false); return; case Intrinsic::amdgcn_inverse_ballot: applyDefaultMapping(OpdMapper); - constrainOpWithReadfirstlane(MI, MRI, 2); // Mask + constrainOpWithReadfirstlane(B, MI, 2); // Mask return; case Intrinsic::amdgcn_ballot: // Use default handling and insert copy to vcc source. @@ -3025,13 +2993,13 @@ // Non-images can have complications from operands that allow both SGPR // and VGPR. For now it's too complicated to figure out the final opcode // to derive the register bank from the MCInstrDesc. - applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg); + applyMappingImage(B, MI, OpdMapper, RSrcIntrin->RsrcArg); return; } case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: { unsigned N = MI.getNumExplicitOperands() - 2; applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, { N }); + executeInWaterfallLoop(B, MI, {N}); return; } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { @@ -3042,7 +3010,7 @@ // This is only allowed to execute with 1 lane, so readfirstlane is safe. assert(OpdMapper.getVRegs(0).empty()); substituteSimpleCopyRegs(OpdMapper, 3); - constrainOpWithReadfirstlane(MI, MRI, 2); // M0 + constrainOpWithReadfirstlane(B, MI, 2); // M0 return; } case Intrinsic::amdgcn_ds_gws_init: @@ -3050,61 +3018,61 @@ case Intrinsic::amdgcn_ds_gws_sema_br: { // Only the first lane is executes, so readfirstlane is safe. substituteSimpleCopyRegs(OpdMapper, 1); - constrainOpWithReadfirstlane(MI, MRI, 2); // M0 + constrainOpWithReadfirstlane(B, MI, 2); // M0 return; } case Intrinsic::amdgcn_ds_gws_sema_v: case Intrinsic::amdgcn_ds_gws_sema_p: case Intrinsic::amdgcn_ds_gws_sema_release_all: { // Only the first lane is executes, so readfirstlane is safe. - constrainOpWithReadfirstlane(MI, MRI, 1); // M0 + constrainOpWithReadfirstlane(B, MI, 1); // M0 return; } case Intrinsic::amdgcn_ds_append: case Intrinsic::amdgcn_ds_consume: { - constrainOpWithReadfirstlane(MI, MRI, 2); // M0 + constrainOpWithReadfirstlane(B, MI, 2); // M0 return; } case Intrinsic::amdgcn_s_sendmsg: case Intrinsic::amdgcn_s_sendmsghalt: { // FIXME: Should this use a waterfall loop? - constrainOpWithReadfirstlane(MI, MRI, 2); // M0 + constrainOpWithReadfirstlane(B, MI, 2); // M0 return; } case Intrinsic::amdgcn_s_setreg: { - constrainOpWithReadfirstlane(MI, MRI, 2); + constrainOpWithReadfirstlane(B, MI, 2); return; } case Intrinsic::amdgcn_raw_buffer_load_lds: case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: { applyDefaultMapping(OpdMapper); - constrainOpWithReadfirstlane(MI, MRI, 1); // rsrc - constrainOpWithReadfirstlane(MI, MRI, 2); // M0 - constrainOpWithReadfirstlane(MI, MRI, 5); // soffset + constrainOpWithReadfirstlane(B, MI, 1); // rsrc + constrainOpWithReadfirstlane(B, MI, 2); // M0 + constrainOpWithReadfirstlane(B, MI, 5); // soffset return; } case Intrinsic::amdgcn_struct_buffer_load_lds: case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: { applyDefaultMapping(OpdMapper); - constrainOpWithReadfirstlane(MI, MRI, 1); // rsrc - constrainOpWithReadfirstlane(MI, MRI, 2); // M0 - constrainOpWithReadfirstlane(MI, MRI, 6); // soffset + constrainOpWithReadfirstlane(B, MI, 1); // rsrc + constrainOpWithReadfirstlane(B, MI, 2); // M0 + constrainOpWithReadfirstlane(B, MI, 6); // soffset return; } case Intrinsic::amdgcn_global_load_lds: { applyDefaultMapping(OpdMapper); - constrainOpWithReadfirstlane(MI, MRI, 2); + constrainOpWithReadfirstlane(B, MI, 2); return; } case Intrinsic::amdgcn_lds_direct_load: { applyDefaultMapping(OpdMapper); // Readlane for m0 value, which is always the last operand. - constrainOpWithReadfirstlane(MI, MRI, MI.getNumOperands() - 1); // Index + constrainOpWithReadfirstlane(B, MI, MI.getNumOperands() - 1); // Index return; } case Intrinsic::amdgcn_exp_row: applyDefaultMapping(OpdMapper); - constrainOpWithReadfirstlane(MI, MRI, 8); // M0 + constrainOpWithReadfirstlane(B, MI, 8); // M0 return; default: { if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = @@ -3113,7 +3081,7 @@ // and VGPR. For now it's too complicated to figure out the final opcode // to derive the register bank from the MCInstrDesc. if (RSrcIntrin->IsImage) { - applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg); + applyMappingImage(B, MI, OpdMapper, RSrcIntrin->RsrcArg); return; } } @@ -3214,29 +3182,29 @@ } ++End; - MachineIRBuilder B(*Start); - executeInWaterfallLoop(B, make_range(Start, End), SGPROperandRegs, MRI); + B.setInsertPt(B.getMBB(), Start); + executeInWaterfallLoop(B, make_range(Start, End), SGPROperandRegs); break; } case AMDGPU::G_LOAD: case AMDGPU::G_ZEXTLOAD: case AMDGPU::G_SEXTLOAD: { - if (applyMappingLoad(MI, OpdMapper, MRI)) + if (applyMappingLoad(B, OpdMapper, MI)) return; break; } case AMDGPU::G_DYN_STACKALLOC: - applyMappingDynStackAlloc(MI, OpdMapper, MRI); + applyMappingDynStackAlloc(B, OpdMapper, MI); return; case AMDGPU::G_SBFX: - applyMappingBFE(OpdMapper, /*Signed*/ true); + applyMappingBFE(B, OpdMapper, /*Signed*/ true); return; case AMDGPU::G_UBFX: - applyMappingBFE(OpdMapper, /*Signed*/ false); + applyMappingBFE(B, OpdMapper, /*Signed*/ false); return; case AMDGPU::G_AMDGPU_MAD_U64_U32: case AMDGPU::G_AMDGPU_MAD_I64_I32: - applyMappingMAD_64_32(OpdMapper); + applyMappingMAD_64_32(B, OpdMapper); return; default: break; Index: llvm/lib/Target/Mips/MipsRegisterBankInfo.h =================================================================== --- llvm/lib/Target/Mips/MipsRegisterBankInfo.h +++ llvm/lib/Target/Mips/MipsRegisterBankInfo.h @@ -42,7 +42,8 @@ /// G_UNMERGE and erase instructions that became dead in the process. We /// manually assign bank to def operand of all new instructions that were /// created in the process since they will not end up in RegBankSelect loop. - void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + void applyMappingImpl(MachineIRBuilder &Builder, + const OperandsMapper &OpdMapper) const override; /// RegBankSelect determined that s64 operand is better to be split into two /// s32 operands in gprb. Here we manually set register banks of def operands Index: llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -675,9 +675,15 @@ namespace { class InstManager : public GISelChangeObserver { InstListTy &InstList; + MachineIRBuilder &B; public: - InstManager(InstListTy &Insts) : InstList(Insts) {} + InstManager(MachineIRBuilder &B, InstListTy &Insts) : InstList(Insts), B(B) { + assert(!B.isObservingChanges()); + B.setChangeObserver(*this); + } + + ~InstManager() { B.stopObservingChanges(); } void createdInstr(MachineInstr &MI) override { InstList.insert(&MI); } void erasingInstr(MachineInstr &MI) override {} @@ -724,17 +730,18 @@ } void MipsRegisterBankInfo::applyMappingImpl( - const OperandsMapper &OpdMapper) const { + MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); + Builder.setInstrAndDebugLoc(MI); + InstListTy NewInstrs; MachineFunction *MF = MI.getMF(); MachineRegisterInfo &MRI = OpdMapper.getMRI(); const LegalizerInfo &LegInfo = *MF->getSubtarget().getLegalizerInfo(); - InstManager NewInstrObserver(NewInstrs); - MachineIRBuilder B(MI, NewInstrObserver); - LegalizerHelper Helper(*MF, NewInstrObserver, B); - LegalizationArtifactCombiner ArtCombiner(B, MF->getRegInfo(), LegInfo); + InstManager NewInstrObserver(Builder, NewInstrs); + LegalizerHelper Helper(*MF, NewInstrObserver, Builder); + LegalizationArtifactCombiner ArtCombiner(Builder, MF->getRegInfo(), LegInfo); switch (MI.getOpcode()) { case TargetOpcode::G_LOAD: Index: llvm/lib/Target/X86/X86RegisterBankInfo.h =================================================================== --- llvm/lib/Target/X86/X86RegisterBankInfo.h +++ llvm/lib/Target/X86/X86RegisterBankInfo.h @@ -71,7 +71,8 @@ getInstrAlternativeMappings(const MachineInstr &MI) const override; /// See RegisterBankInfo::applyMapping. - void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + void applyMappingImpl(MachineIRBuilder &Builder, + const OperandsMapper &OpdMapper) const override; const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override; Index: llvm/lib/Target/X86/X86RegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86RegisterBankInfo.cpp +++ llvm/lib/Target/X86/X86RegisterBankInfo.cpp @@ -272,7 +272,7 @@ } void X86RegisterBankInfo::applyMappingImpl( - const OperandsMapper &OpdMapper) const { + MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const { return applyDefaultMapping(OpdMapper); }