Index: llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -127,7 +127,7 @@ // with vectors of pointers. if (AddrReg[i]->getReg() != AddrRegNext.getReg() || AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) { - return false; + return false; } } return true; @@ -140,7 +140,8 @@ if (AddrOp->isImm()) continue; - // Don't try to merge addresses that aren't either immediates or registers. + // Don't try to merge addresses that aren't either immediates or + // registers. // TODO: Should be possible to merge FrameIndexes and maybe some other // non-register if (!AddrOp->isReg()) @@ -185,14 +186,14 @@ AliasAnalysis *AA = nullptr; bool OptimizeAgain; - static bool dmasksCanBeCombined(const CombineInfo &CI, - const SIInstrInfo &TII, + static bool dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII, const CombineInfo &Paired); static bool offsetsCanBeCombined(CombineInfo &CI, const GCNSubtarget &STI, CombineInfo &Paired, bool Modify = false); static bool widthsFit(const GCNSubtarget &STI, const CombineInfo &CI, const CombineInfo &Paired); - static unsigned getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired); + static unsigned getNewOpcode(const CombineInfo &CI, + const CombineInfo &Paired); static std::pair getSubRegIdxs(const CombineInfo &CI, const CombineInfo &Paired); const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI, @@ -204,9 +205,9 @@ unsigned read2Opcode(unsigned EltSize) const; unsigned read2ST64Opcode(unsigned EltSize) const; - MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI, - CombineInfo &Paired, - const SmallVectorImpl &InstsToMove); + MachineBasicBlock::iterator + mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, + const SmallVectorImpl &InstsToMove); unsigned write2Opcode(unsigned EltSize) const; unsigned write2ST64Opcode(unsigned EltSize) const; @@ -237,15 +238,17 @@ Register computeBase(MachineInstr &MI, const MemAddress &Addr) const; MachineOperand createRegOrImm(int32_t Val, MachineInstr &MI) const; Optional extractConstOffset(const MachineOperand &Op) const; - void processBaseWithConstOffset(const MachineOperand &Base, MemAddress &Addr) const; + void processBaseWithConstOffset(const MachineOperand &Base, + MemAddress &Addr) const; /// Promotes constant offset to the immediate by adjusting the base. It /// tries to use a base from the nearby instructions that allows it to have /// a 13bit constant offset which gets promoted to the immediate. - bool promoteConstantOffsetToImm(MachineInstr &CI, - MemInfoMap &Visited, - SmallPtrSet &Promoted) const; - void addInstToMergeableList(const CombineInfo &CI, - std::list > &MergeableInsts) const; + bool + promoteConstantOffsetToImm(MachineInstr &CI, MemInfoMap &Visited, + SmallPtrSet &Promoted) const; + void addInstToMergeableList( + const CombineInfo &CI, + std::list> &MergeableInsts) const; std::pair collectMergeableInsts( MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, @@ -261,7 +264,7 @@ bool optimizeInstsWithSameBaseAddr(std::list &MergeList, bool &OptimizeListAgain); - bool optimizeBlock(std::list > &MergeableInsts); + bool optimizeBlock(std::list> &MergeableInsts); bool runOnMachineFunction(MachineFunction &MF) override; @@ -275,8 +278,8 @@ } MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties() - .set(MachineFunctionProperties::Property::IsSSA); + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); } }; @@ -305,14 +308,20 @@ return 4; case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM: return 8; - case AMDGPU::DS_READ_B32: LLVM_FALLTHROUGH; - case AMDGPU::DS_READ_B32_gfx9: LLVM_FALLTHROUGH; - case AMDGPU::DS_WRITE_B32: LLVM_FALLTHROUGH; + case AMDGPU::DS_READ_B32: + LLVM_FALLTHROUGH; + case AMDGPU::DS_READ_B32_gfx9: + LLVM_FALLTHROUGH; + case AMDGPU::DS_WRITE_B32: + LLVM_FALLTHROUGH; case AMDGPU::DS_WRITE_B32_gfx9: return 1; - case AMDGPU::DS_READ_B64: LLVM_FALLTHROUGH; - case AMDGPU::DS_READ_B64_gfx9: LLVM_FALLTHROUGH; - case AMDGPU::DS_WRITE_B64: LLVM_FALLTHROUGH; + case AMDGPU::DS_READ_B64: + LLVM_FALLTHROUGH; + case AMDGPU::DS_READ_B64_gfx9: + LLVM_FALLTHROUGH; + case AMDGPU::DS_WRITE_B64: + LLVM_FALLTHROUGH; case AMDGPU::DS_WRITE_B64_gfx9: return 2; default: @@ -495,14 +504,13 @@ switch (InstClass) { case DS_READ: - EltSize = - (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 - : 4; - break; - case DS_WRITE: EltSize = - (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8 - : 4; + (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 : 4; + break; + case DS_WRITE: + EltSize = (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) + ? 8 + : 4; break; case S_BUFFER_LOAD_IMM: EltSize = AMDGPU::convertSMRDOffsetUnits(STM, 4); @@ -679,9 +687,9 @@ return false; // Check other optional immediate operands for equality. - unsigned OperandsToMatch[] = {AMDGPU::OpName::cpol, AMDGPU::OpName::d16, + unsigned OperandsToMatch[] = {AMDGPU::OpName::cpol, AMDGPU::OpName::d16, AMDGPU::OpName::unorm, AMDGPU::OpName::da, - AMDGPU::OpName::r128, AMDGPU::OpName::a16}; + AMDGPU::OpName::r128, AMDGPU::OpName::a16}; for (auto op : OperandsToMatch) { int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op); @@ -704,8 +712,8 @@ } static unsigned getBufferFormatWithCompCount(unsigned OldFormat, - unsigned ComponentCount, - const GCNSubtarget &STI) { + unsigned ComponentCount, + const GCNSubtarget &STI) { if (ComponentCount > 4) return 0; @@ -773,7 +781,8 @@ if (Info0->BitsPerComp != 32) return false; - if (getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, STI) == 0) + if (getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, STI) == + 0) return false; } @@ -1044,8 +1053,8 @@ : AMDGPU::DS_READ2ST64_B64_gfx9; } -MachineBasicBlock::iterator -SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, +MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( + CombineInfo &CI, CombineInfo &Paired, const SmallVectorImpl &InstsToMove) { MachineBasicBlock *MBB = CI.I->getParent(); @@ -1143,9 +1152,9 @@ : AMDGPU::DS_WRITE2ST64_B64_gfx9; } -MachineBasicBlock::iterator -SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove) { +MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( + CombineInfo &CI, CombineInfo &Paired, + const SmallVectorImpl &InstsToMove) { MachineBasicBlock *MBB = CI.I->getParent(); // Be sure to use .addOperand(), and not .addReg() with these. We want to be @@ -1211,9 +1220,9 @@ return Write2; } -MachineBasicBlock::iterator -SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, - const SmallVectorImpl &InstsToMove) { +MachineBasicBlock::iterator SILoadStoreOptimizer::mergeImagePair( + CombineInfo &CI, CombineInfo &Paired, + const SmallVectorImpl &InstsToMove) { MachineBasicBlock *MBB = CI.I->getParent(); DebugLoc DL = CI.I->getDebugLoc(); const unsigned Opcode = getNewOpcode(CI, Paired); @@ -1241,7 +1250,8 @@ const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + MachineInstr *New = MIB.addMemOperand( + combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); unsigned SubRegIdx0, SubRegIdx1; std::tie(SubRegIdx0, SubRegIdx1) = getSubRegIdxs(CI, Paired); @@ -1286,11 +1296,12 @@ const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); MachineInstr *New = - BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg) - .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) - .addImm(MergedOffset) // offset - .addImm(CI.CPol) // cpol - .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) + .addImm(MergedOffset) // offset + .addImm(CI.CPol) // cpol + .addMemOperand( + combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); std::pair SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1345,13 +1356,14 @@ const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); MachineInstr *New = - MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) - .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) - .addImm(MergedOffset) // offset - .addImm(CI.CPol) // cpol - .addImm(0) // tfe - .addImm(0) // swz - .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) + .addImm(MergedOffset) // offset + .addImm(CI.CPol) // cpol + .addImm(0) // tfe + .addImm(0) // swz + .addMemOperand( + combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); std::pair SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1490,10 +1502,10 @@ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) .addImm(std::min(CI.Offset, Paired.Offset)) // offset - .addImm(JoinedFormat) // format - .addImm(CI.CPol) // cpol - .addImm(0) // tfe - .addImm(0) // swz + .addImm(JoinedFormat) // format + .addImm(CI.CPol) // cpol + .addImm(0) // tfe + .addImm(0) // swz .addMemOperand( combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); @@ -1569,7 +1581,8 @@ } } else { static const unsigned Idxs[4][4] = { - {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3}, + {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, + AMDGPU::sub0_sub1_sub2_sub3}, {AMDGPU::sub1, AMDGPU::sub1_sub2, AMDGPU::sub1_sub2_sub3, 0}, {AMDGPU::sub2, AMDGPU::sub2_sub3, 0, 0}, {AMDGPU::sub3, 0, 0, 0}, @@ -1647,7 +1660,6 @@ if (Regs.VAddr) MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)); - // It shouldn't be possible to get this far if the two instructions // don't have a single memoperand, because MachineInstr::mayAlias() // will return true if this is the case. @@ -1657,13 +1669,14 @@ const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); MachineInstr *New = - MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) - .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) - .addImm(std::min(CI.Offset, Paired.Offset)) // offset - .addImm(CI.CPol) // cpol - .addImm(0) // tfe - .addImm(0) // swz - .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) + .addImm(std::min(CI.Offset, Paired.Offset)) // offset + .addImm(CI.CPol) // cpol + .addImm(0) // tfe + .addImm(0) // swz + .addMemOperand( + combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); moveInstsAfter(MIB, InstsToMove); @@ -1672,17 +1685,17 @@ return New; } -MachineOperand -SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) const { +MachineOperand SILoadStoreOptimizer::createRegOrImm(int32_t Val, + MachineInstr &MI) const { APInt V(32, Val, true); if (TII->isInlineConstant(V)) return MachineOperand::CreateImm(Val); Register Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); MachineInstr *Mov = - BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), - TII->get(AMDGPU::S_MOV_B32), Reg) - .addImm(Val); + BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), + TII->get(AMDGPU::S_MOV_B32), Reg) + .addImm(Val); (void)Mov; LLVM_DEBUG(dbgs() << " "; Mov->dump()); return MachineOperand::CreateReg(Reg, false); @@ -1704,9 +1717,10 @@ "Expected 32-bit Base-Register-Hi!!"); LLVM_DEBUG(dbgs() << " Re-Computed Anchor-Base:\n"); - MachineOperand OffsetLo = createRegOrImm(static_cast(Addr.Offset), MI); + MachineOperand OffsetLo = + createRegOrImm(static_cast(Addr.Offset), MI); MachineOperand OffsetHi = - createRegOrImm(static_cast(Addr.Offset >> 32), MI); + createRegOrImm(static_cast(Addr.Offset >> 32), MI); const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); Register CarryReg = MRI->createVirtualRegister(CarryRC); @@ -1715,31 +1729,31 @@ Register DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); Register DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); MachineInstr *LoHalf = - BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_CO_U32_e64), DestSub0) - .addReg(CarryReg, RegState::Define) - .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg) - .add(OffsetLo) - .addImm(0); // clamp bit + BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_CO_U32_e64), DestSub0) + .addReg(CarryReg, RegState::Define) + .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg) + .add(OffsetLo) + .addImm(0); // clamp bit (void)LoHalf; LLVM_DEBUG(dbgs() << " "; LoHalf->dump();); MachineInstr *HiHalf = - BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADDC_U32_e64), DestSub1) - .addReg(DeadCarryReg, RegState::Define | RegState::Dead) - .addReg(Addr.Base.HiReg, 0, Addr.Base.HiSubReg) - .add(OffsetHi) - .addReg(CarryReg, RegState::Kill) - .addImm(0); // clamp bit + BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADDC_U32_e64), DestSub1) + .addReg(DeadCarryReg, RegState::Define | RegState::Dead) + .addReg(Addr.Base.HiReg, 0, Addr.Base.HiSubReg) + .add(OffsetHi) + .addReg(CarryReg, RegState::Kill) + .addImm(0); // clamp bit (void)HiHalf; LLVM_DEBUG(dbgs() << " "; HiHalf->dump();); Register FullDestReg = MRI->createVirtualRegister(TRI->getVGPR64Class()); MachineInstr *FullBase = - BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::REG_SEQUENCE), FullDestReg) - .addReg(DestSub0) - .addImm(AMDGPU::sub0) - .addReg(DestSub1) - .addImm(AMDGPU::sub1); + BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::REG_SEQUENCE), FullDestReg) + .addReg(DestSub0) + .addImm(AMDGPU::sub0) + .addReg(DestSub1) + .addImm(AMDGPU::sub1); (void)FullBase; LLVM_DEBUG(dbgs() << " "; FullBase->dump(); dbgs() << "\n";); @@ -1782,14 +1796,14 @@ // %HI:vgpr_32, = V_ADDC_U32_e64 %BASE_HI:vgpr_32, 0, killed %c:sreg_64_xexec // %Base:vreg_64 = // REG_SEQUENCE %LO:vgpr_32, %subreg.sub0, %HI:vgpr_32, %subreg.sub1 -void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base, - MemAddress &Addr) const { +void SILoadStoreOptimizer::processBaseWithConstOffset( + const MachineOperand &Base, MemAddress &Addr) const { if (!Base.isReg()) return; MachineInstr *Def = MRI->getUniqueVRegDef(Base.getReg()); - if (!Def || Def->getOpcode() != AMDGPU::REG_SEQUENCE - || Def->getNumOperands() != 5) + if (!Def || Def->getOpcode() != AMDGPU::REG_SEQUENCE || + Def->getNumOperands() != 5) return; MachineOperand BaseLo = Def->getOperand(1); @@ -1836,8 +1850,7 @@ } bool SILoadStoreOptimizer::promoteConstantOffsetToImm( - MachineInstr &MI, - MemInfoMap &Visited, + MachineInstr &MI, MemInfoMap &Visited, SmallPtrSet &AnchorList) const { if (!(MI.mayLoad() ^ MI.mayStore())) @@ -1876,7 +1889,8 @@ } LLVM_DEBUG(dbgs() << " BASE: {" << MAddr.Base.HiReg << ", " - << MAddr.Base.LoReg << "} Offset: " << MAddr.Offset << "\n\n";); + << MAddr.Base.LoReg << "} Offset: " << MAddr.Offset + << "\n\n";); // Step2: Traverse through MI's basic block and find an anchor(that has the // same base-registers) with the highest 13bit distance from MI's offset. @@ -1913,9 +1927,9 @@ MachineBasicBlock::iterator MBBI = MI.getIterator(); ++MBBI; const SITargetLowering *TLI = - static_cast(STM->getTargetLowering()); + static_cast(STM->getTargetLowering()); - for ( ; MBBI != E; ++MBBI) { + for (; MBBI != E; ++MBBI) { MachineInstr &MINext = *MBBI; // TODO: Support finding an anchor(with same base) from store addresses or // any other load addresses where the opcodes are different. @@ -1924,7 +1938,7 @@ continue; const MachineOperand &BaseNext = - *TII->getNamedOperand(MINext, AMDGPU::OpName::vaddr); + *TII->getNamedOperand(MINext, AMDGPU::OpName::vaddr); MemAddress MAddrNext; if (Visited.find(&MINext) == Visited.end()) { processBaseWithConstOffset(BaseNext, MAddrNext); @@ -1956,8 +1970,8 @@ if (AnchorInst) { LLVM_DEBUG(dbgs() << " Anchor-Inst(with max-distance from Offset): "; AnchorInst->dump()); - LLVM_DEBUG(dbgs() << " Anchor-Offset from BASE: " - << AnchorAddr.Offset << "\n\n"); + LLVM_DEBUG(dbgs() << " Anchor-Offset from BASE: " << AnchorAddr.Offset + << "\n\n"); // Instead of moving up, just re-compute anchor-instruction's base address. Register Base = computeBase(MI, AnchorAddr); @@ -1971,8 +1985,8 @@ AM.BaseOffs = P.second - AnchorAddr.Offset; if (TLI->isLegalGlobalAddressingMode(AM)) { - LLVM_DEBUG(dbgs() << " Promote Offset(" << P.second; - dbgs() << ")"; P.first->dump()); + LLVM_DEBUG(dbgs() << " Promote Offset(" << P.second; dbgs() << ")"; + P.first->dump()); updateBaseAndOffset(*P.first, Base, P.second - AnchorAddr.Offset); LLVM_DEBUG(dbgs() << " After promotion: "; P.first->dump()); } @@ -1984,8 +1998,9 @@ return false; } -void SILoadStoreOptimizer::addInstToMergeableList(const CombineInfo &CI, - std::list > &MergeableInsts) const { +void SILoadStoreOptimizer::addInstToMergeableList( + const CombineInfo &CI, + std::list> &MergeableInsts) const { for (std::list &AddrList : MergeableInsts) { if (AddrList.front().InstClass == CI.InstClass && AddrList.front().hasSameBaseAddress(*CI.I)) { @@ -2005,14 +2020,15 @@ std::list> &MergeableInsts) const { bool Modified = false; - // Sort potential mergeable instructions into lists. One list per base address. + // Sort potential mergeable instructions into lists. One list per base + // address. unsigned Order = 0; MachineBasicBlock::iterator BlockI = Begin; for (; BlockI != End; ++BlockI) { MachineInstr &MI = *BlockI; - // We run this before checking if an address is mergeable, because it can produce - // better code even if the instructions aren't mergeable. + // We run this before checking if an address is mergeable, because it can + // produce better code even if the instructions aren't mergeable. if (promoteConstantOffsetToImm(MI, Visited, AnchorList)) Modified = true; @@ -2045,12 +2061,13 @@ // At this point we have lists of Mergeable instructions. // // Part 2: Sort lists by offset and then for each CombineInfo object in the - // list try to find an instruction that can be merged with I. If an instruction - // is found, it is stored in the Paired field. If no instructions are found, then - // the CombineInfo object is deleted from the list. + // list try to find an instruction that can be merged with I. If an + // instruction is found, it is stored in the Paired field. If no instructions + // are found, then the CombineInfo object is deleted from the list. for (std::list>::iterator I = MergeableInsts.begin(), - E = MergeableInsts.end(); I != E;) { + E = MergeableInsts.end(); + I != E;) { std::list &MergeList = *I; if (MergeList.size() <= 1) { @@ -2064,10 +2081,9 @@ // Sort the lists by offsets, this way mergeable instructions will be // adjacent to each other in the list, which will make it easier to find // matches. - MergeList.sort( - [] (const CombineInfo &A, CombineInfo &B) { - return A.Offset < B.Offset; - }); + MergeList.sort([](CombineInfo const &A, CombineInfo const &B) { + return A.Offset < B.Offset; + }); ++I; } @@ -2078,11 +2094,12 @@ // the same base register. We rely on the scheduler to do the hard work of // clustering nearby loads, and assume these are all adjacent. bool SILoadStoreOptimizer::optimizeBlock( - std::list > &MergeableInsts) { + std::list> &MergeableInsts) { bool Modified = false; for (std::list>::iterator I = MergeableInsts.begin(), - E = MergeableInsts.end(); I != E;) { + E = MergeableInsts.end(); + I != E;) { std::list &MergeList = *I; bool OptimizeListAgain = false; @@ -2107,10 +2124,8 @@ return Modified; } -bool -SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr( - std::list &MergeList, - bool &OptimizeListAgain) { +bool SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr( + std::list &MergeList, bool &OptimizeListAgain) { if (MergeList.empty()) return false;