Index: llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -127,7 +127,7 @@
         // with vectors of pointers.
         if (AddrReg[i]->getReg() != AddrRegNext.getReg() ||
             AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) {
-         return false;
+          return false;
         }
       }
       return true;
@@ -140,7 +140,8 @@
         if (AddrOp->isImm())
           continue;
 
-        // Don't try to merge addresses that aren't either immediates or registers.
+        // Don't try to merge addresses that aren't either immediates or
+        // registers.
         // TODO: Should be possible to merge FrameIndexes and maybe some other
         // non-register
         if (!AddrOp->isReg())
@@ -185,14 +186,14 @@
   AliasAnalysis *AA = nullptr;
   bool OptimizeAgain;
 
-  static bool dmasksCanBeCombined(const CombineInfo &CI,
-                                  const SIInstrInfo &TII,
+  static bool dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII,
                                   const CombineInfo &Paired);
   static bool offsetsCanBeCombined(CombineInfo &CI, const GCNSubtarget &STI,
                                    CombineInfo &Paired, bool Modify = false);
   static bool widthsFit(const GCNSubtarget &STI, const CombineInfo &CI,
                         const CombineInfo &Paired);
-  static unsigned getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired);
+  static unsigned getNewOpcode(const CombineInfo &CI,
+                               const CombineInfo &Paired);
   static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI,
                                                      const CombineInfo &Paired);
   const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI,
@@ -204,9 +205,9 @@
 
   unsigned read2Opcode(unsigned EltSize) const;
   unsigned read2ST64Opcode(unsigned EltSize) const;
-  MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI,
-                                             CombineInfo &Paired,
-                  const SmallVectorImpl<MachineInstr *> &InstsToMove);
+  MachineBasicBlock::iterator
+  mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
+                 const SmallVectorImpl<MachineInstr *> &InstsToMove);
 
   unsigned write2Opcode(unsigned EltSize) const;
   unsigned write2ST64Opcode(unsigned EltSize) const;
@@ -237,15 +238,17 @@
   Register computeBase(MachineInstr &MI, const MemAddress &Addr) const;
   MachineOperand createRegOrImm(int32_t Val, MachineInstr &MI) const;
   Optional<int32_t> extractConstOffset(const MachineOperand &Op) const;
-  void processBaseWithConstOffset(const MachineOperand &Base, MemAddress &Addr) const;
+  void processBaseWithConstOffset(const MachineOperand &Base,
+                                  MemAddress &Addr) const;
   /// Promotes constant offset to the immediate by adjusting the base. It
   /// tries to use a base from the nearby instructions that allows it to have
   /// a 13bit constant offset which gets promoted to the immediate.
-  bool promoteConstantOffsetToImm(MachineInstr &CI,
-                                  MemInfoMap &Visited,
-                                  SmallPtrSet<MachineInstr *, 4> &Promoted) const;
-  void addInstToMergeableList(const CombineInfo &CI,
-                  std::list<std::list<CombineInfo> > &MergeableInsts) const;
+  bool
+  promoteConstantOffsetToImm(MachineInstr &CI, MemInfoMap &Visited,
+                             SmallPtrSet<MachineInstr *, 4> &Promoted) const;
+  void addInstToMergeableList(
+      const CombineInfo &CI,
+      std::list<std::list<CombineInfo>> &MergeableInsts) const;
 
   std::pair<MachineBasicBlock::iterator, bool> collectMergeableInsts(
       MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End,
@@ -261,7 +264,7 @@
 
   bool optimizeInstsWithSameBaseAddr(std::list<CombineInfo> &MergeList,
                                      bool &OptimizeListAgain);
-  bool optimizeBlock(std::list<std::list<CombineInfo> > &MergeableInsts);
+  bool optimizeBlock(std::list<std::list<CombineInfo>> &MergeableInsts);
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -275,8 +278,8 @@
   }
 
   MachineFunctionProperties getRequiredProperties() const override {
-    return MachineFunctionProperties()
-      .set(MachineFunctionProperties::Property::IsSSA);
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::IsSSA);
   }
 };
 
@@ -305,14 +308,20 @@
     return 4;
   case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
     return 8;
-  case AMDGPU::DS_READ_B32:      LLVM_FALLTHROUGH;
-  case AMDGPU::DS_READ_B32_gfx9: LLVM_FALLTHROUGH;
-  case AMDGPU::DS_WRITE_B32:     LLVM_FALLTHROUGH;
+  case AMDGPU::DS_READ_B32:
+    LLVM_FALLTHROUGH;
+  case AMDGPU::DS_READ_B32_gfx9:
+    LLVM_FALLTHROUGH;
+  case AMDGPU::DS_WRITE_B32:
+    LLVM_FALLTHROUGH;
   case AMDGPU::DS_WRITE_B32_gfx9:
     return 1;
-  case AMDGPU::DS_READ_B64:      LLVM_FALLTHROUGH;
-  case AMDGPU::DS_READ_B64_gfx9: LLVM_FALLTHROUGH;
-  case AMDGPU::DS_WRITE_B64:     LLVM_FALLTHROUGH;
+  case AMDGPU::DS_READ_B64:
+    LLVM_FALLTHROUGH;
+  case AMDGPU::DS_READ_B64_gfx9:
+    LLVM_FALLTHROUGH;
+  case AMDGPU::DS_WRITE_B64:
+    LLVM_FALLTHROUGH;
   case AMDGPU::DS_WRITE_B64_gfx9:
     return 2;
   default:
@@ -495,14 +504,13 @@
 
   switch (InstClass) {
   case DS_READ:
-   EltSize =
-          (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8
-                                                                          : 4;
-   break;
-  case DS_WRITE:
     EltSize =
-          (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8
-                                                                            : 4;
+        (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 : 4;
+    break;
+  case DS_WRITE:
+    EltSize = (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9)
+                  ? 8
+                  : 4;
     break;
   case S_BUFFER_LOAD_IMM:
     EltSize = AMDGPU::convertSMRDOffsetUnits(STM, 4);
@@ -679,9 +687,9 @@
     return false;
 
   // Check other optional immediate operands for equality.
-  unsigned OperandsToMatch[] = {AMDGPU::OpName::cpol, AMDGPU::OpName::d16,
+  unsigned OperandsToMatch[] = {AMDGPU::OpName::cpol,  AMDGPU::OpName::d16,
                                 AMDGPU::OpName::unorm, AMDGPU::OpName::da,
-                                AMDGPU::OpName::r128, AMDGPU::OpName::a16};
+                                AMDGPU::OpName::r128,  AMDGPU::OpName::a16};
 
   for (auto op : OperandsToMatch) {
     int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op);
@@ -704,8 +712,8 @@
 }
 
 static unsigned getBufferFormatWithCompCount(unsigned OldFormat,
-                                       unsigned ComponentCount,
-                                       const GCNSubtarget &STI) {
+                                             unsigned ComponentCount,
+                                             const GCNSubtarget &STI) {
   if (ComponentCount > 4)
     return 0;
 
@@ -773,7 +781,8 @@
     if (Info0->BitsPerComp != 32)
       return false;
 
-    if (getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, STI) == 0)
+    if (getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, STI) ==
+        0)
       return false;
   }
 
@@ -1044,8 +1053,8 @@
                         : AMDGPU::DS_READ2ST64_B64_gfx9;
 }
 
-MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
+    CombineInfo &CI, CombineInfo &Paired,
     const SmallVectorImpl<MachineInstr *> &InstsToMove) {
   MachineBasicBlock *MBB = CI.I->getParent();
 
@@ -1143,9 +1152,9 @@
                         : AMDGPU::DS_WRITE2ST64_B64_gfx9;
 }
 
-MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired,
-                                      const SmallVectorImpl<MachineInstr *> &InstsToMove) {
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
+    CombineInfo &CI, CombineInfo &Paired,
+    const SmallVectorImpl<MachineInstr *> &InstsToMove) {
   MachineBasicBlock *MBB = CI.I->getParent();
 
   // Be sure to use .addOperand(), and not .addReg() with these. We want to be
@@ -1211,9 +1220,9 @@
   return Write2;
 }
 
-MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
-                           const SmallVectorImpl<MachineInstr *> &InstsToMove) {
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeImagePair(
+    CombineInfo &CI, CombineInfo &Paired,
+    const SmallVectorImpl<MachineInstr *> &InstsToMove) {
   MachineBasicBlock *MBB = CI.I->getParent();
   DebugLoc DL = CI.I->getDebugLoc();
   const unsigned Opcode = getNewOpcode(CI, Paired);
@@ -1241,7 +1250,8 @@
   const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
   const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
 
-  MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+  MachineInstr *New = MIB.addMemOperand(
+      combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
 
   unsigned SubRegIdx0, SubRegIdx1;
   std::tie(SubRegIdx0, SubRegIdx1) = getSubRegIdxs(CI, Paired);
@@ -1286,11 +1296,12 @@
   const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
 
   MachineInstr *New =
-    BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg)
-        .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
-        .addImm(MergedOffset) // offset
-        .addImm(CI.CPol)      // cpol
-        .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+      BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg)
+          .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
+          .addImm(MergedOffset) // offset
+          .addImm(CI.CPol)      // cpol
+          .addMemOperand(
+              combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
 
   std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
   const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1345,13 +1356,14 @@
   const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
 
   MachineInstr *New =
-    MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
-        .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
-        .addImm(MergedOffset) // offset
-        .addImm(CI.CPol)      // cpol
-        .addImm(0)            // tfe
-        .addImm(0)            // swz
-        .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+      MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
+          .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
+          .addImm(MergedOffset) // offset
+          .addImm(CI.CPol)      // cpol
+          .addImm(0)            // tfe
+          .addImm(0)            // swz
+          .addMemOperand(
+              combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
 
   std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
   const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1490,10 +1502,10 @@
       MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
           .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
           .addImm(std::min(CI.Offset, Paired.Offset)) // offset
-          .addImm(JoinedFormat)                     // format
-          .addImm(CI.CPol)                          // cpol
-          .addImm(0)                                // tfe
-          .addImm(0)                                // swz
+          .addImm(JoinedFormat)                       // format
+          .addImm(CI.CPol)                            // cpol
+          .addImm(0)                                  // tfe
+          .addImm(0)                                  // swz
           .addMemOperand(
               combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
 
@@ -1569,7 +1581,8 @@
     }
   } else {
     static const unsigned Idxs[4][4] = {
-        {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3},
+        {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2,
+         AMDGPU::sub0_sub1_sub2_sub3},
         {AMDGPU::sub1, AMDGPU::sub1_sub2, AMDGPU::sub1_sub2_sub3, 0},
         {AMDGPU::sub2, AMDGPU::sub2_sub3, 0, 0},
         {AMDGPU::sub3, 0, 0, 0},
@@ -1647,7 +1660,6 @@
   if (Regs.VAddr)
     MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
 
-
   // It shouldn't be possible to get this far if the two instructions
   // don't have a single memoperand, because MachineInstr::mayAlias()
   // will return true if this is the case.
@@ -1657,13 +1669,14 @@
   const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
 
   MachineInstr *New =
-    MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
-        .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
-        .addImm(std::min(CI.Offset, Paired.Offset)) // offset
-        .addImm(CI.CPol)      // cpol
-        .addImm(0)            // tfe
-        .addImm(0)            // swz
-        .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+      MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
+          .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
+          .addImm(std::min(CI.Offset, Paired.Offset)) // offset
+          .addImm(CI.CPol)                            // cpol
+          .addImm(0)                                  // tfe
+          .addImm(0)                                  // swz
+          .addMemOperand(
+              combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
 
   moveInstsAfter(MIB, InstsToMove);
 
@@ -1672,17 +1685,17 @@
   return New;
 }
 
-MachineOperand
-SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) const {
+MachineOperand SILoadStoreOptimizer::createRegOrImm(int32_t Val,
+                                                    MachineInstr &MI) const {
   APInt V(32, Val, true);
   if (TII->isInlineConstant(V))
     return MachineOperand::CreateImm(Val);
 
   Register Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
   MachineInstr *Mov =
-  BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
-          TII->get(AMDGPU::S_MOV_B32), Reg)
-    .addImm(Val);
+      BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
+              TII->get(AMDGPU::S_MOV_B32), Reg)
+          .addImm(Val);
   (void)Mov;
   LLVM_DEBUG(dbgs() << "    "; Mov->dump());
   return MachineOperand::CreateReg(Reg, false);
@@ -1704,9 +1717,10 @@
          "Expected 32-bit Base-Register-Hi!!");
 
   LLVM_DEBUG(dbgs() << "  Re-Computed Anchor-Base:\n");
-  MachineOperand OffsetLo = createRegOrImm(static_cast<int32_t>(Addr.Offset), MI);
+  MachineOperand OffsetLo =
+      createRegOrImm(static_cast<int32_t>(Addr.Offset), MI);
   MachineOperand OffsetHi =
-    createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI);
+      createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI);
 
   const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
   Register CarryReg = MRI->createVirtualRegister(CarryRC);
@@ -1715,31 +1729,31 @@
   Register DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   Register DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   MachineInstr *LoHalf =
-    BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_CO_U32_e64), DestSub0)
-      .addReg(CarryReg, RegState::Define)
-      .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg)
-      .add(OffsetLo)
-      .addImm(0); // clamp bit
+      BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_CO_U32_e64), DestSub0)
+          .addReg(CarryReg, RegState::Define)
+          .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg)
+          .add(OffsetLo)
+          .addImm(0); // clamp bit
   (void)LoHalf;
   LLVM_DEBUG(dbgs() << "    "; LoHalf->dump(););
 
   MachineInstr *HiHalf =
-  BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADDC_U32_e64), DestSub1)
-    .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
-    .addReg(Addr.Base.HiReg, 0, Addr.Base.HiSubReg)
-    .add(OffsetHi)
-    .addReg(CarryReg, RegState::Kill)
-    .addImm(0); // clamp bit
+      BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADDC_U32_e64), DestSub1)
+          .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
+          .addReg(Addr.Base.HiReg, 0, Addr.Base.HiSubReg)
+          .add(OffsetHi)
+          .addReg(CarryReg, RegState::Kill)
+          .addImm(0); // clamp bit
   (void)HiHalf;
   LLVM_DEBUG(dbgs() << "    "; HiHalf->dump(););
 
   Register FullDestReg = MRI->createVirtualRegister(TRI->getVGPR64Class());
   MachineInstr *FullBase =
-    BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::REG_SEQUENCE), FullDestReg)
-      .addReg(DestSub0)
-      .addImm(AMDGPU::sub0)
-      .addReg(DestSub1)
-      .addImm(AMDGPU::sub1);
+      BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+          .addReg(DestSub0)
+          .addImm(AMDGPU::sub0)
+          .addReg(DestSub1)
+          .addImm(AMDGPU::sub1);
   (void)FullBase;
   LLVM_DEBUG(dbgs() << "    "; FullBase->dump(); dbgs() << "\n";);
 
@@ -1782,14 +1796,14 @@
 //   %HI:vgpr_32, = V_ADDC_U32_e64 %BASE_HI:vgpr_32, 0, killed %c:sreg_64_xexec
 //   %Base:vreg_64 =
 //       REG_SEQUENCE %LO:vgpr_32, %subreg.sub0, %HI:vgpr_32, %subreg.sub1
-void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base,
-                                                      MemAddress &Addr) const {
+void SILoadStoreOptimizer::processBaseWithConstOffset(
+    const MachineOperand &Base, MemAddress &Addr) const {
   if (!Base.isReg())
     return;
 
   MachineInstr *Def = MRI->getUniqueVRegDef(Base.getReg());
-  if (!Def || Def->getOpcode() != AMDGPU::REG_SEQUENCE
-      || Def->getNumOperands() != 5)
+  if (!Def || Def->getOpcode() != AMDGPU::REG_SEQUENCE ||
+      Def->getNumOperands() != 5)
     return;
 
   MachineOperand BaseLo = Def->getOperand(1);
@@ -1836,8 +1850,7 @@
 }
 
 bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
-    MachineInstr &MI,
-    MemInfoMap &Visited,
+    MachineInstr &MI, MemInfoMap &Visited,
     SmallPtrSet<MachineInstr *, 4> &AnchorList) const {
 
   if (!(MI.mayLoad() ^ MI.mayStore()))
@@ -1876,7 +1889,8 @@
   }
 
   LLVM_DEBUG(dbgs() << "  BASE: {" << MAddr.Base.HiReg << ", "
-             << MAddr.Base.LoReg << "} Offset: " << MAddr.Offset << "\n\n";);
+                    << MAddr.Base.LoReg << "} Offset: " << MAddr.Offset
+                    << "\n\n";);
 
   // Step2: Traverse through MI's basic block and find an anchor(that has the
   // same base-registers) with the highest 13bit distance from MI's offset.
@@ -1913,9 +1927,9 @@
   MachineBasicBlock::iterator MBBI = MI.getIterator();
   ++MBBI;
   const SITargetLowering *TLI =
-    static_cast<const SITargetLowering *>(STM->getTargetLowering());
+      static_cast<const SITargetLowering *>(STM->getTargetLowering());
 
-  for ( ; MBBI != E; ++MBBI) {
+  for (; MBBI != E; ++MBBI) {
     MachineInstr &MINext = *MBBI;
     // TODO: Support finding an anchor(with same base) from store addresses or
     // any other load addresses where the opcodes are different.
@@ -1924,7 +1938,7 @@
       continue;
 
     const MachineOperand &BaseNext =
-      *TII->getNamedOperand(MINext, AMDGPU::OpName::vaddr);
+        *TII->getNamedOperand(MINext, AMDGPU::OpName::vaddr);
     MemAddress MAddrNext;
     if (Visited.find(&MINext) == Visited.end()) {
       processBaseWithConstOffset(BaseNext, MAddrNext);
@@ -1956,8 +1970,8 @@
   if (AnchorInst) {
     LLVM_DEBUG(dbgs() << "  Anchor-Inst(with max-distance from Offset): ";
                AnchorInst->dump());
-    LLVM_DEBUG(dbgs() << "  Anchor-Offset from BASE: "
-               <<  AnchorAddr.Offset << "\n\n");
+    LLVM_DEBUG(dbgs() << "  Anchor-Offset from BASE: " << AnchorAddr.Offset
+                      << "\n\n");
 
     // Instead of moving up, just re-compute anchor-instruction's base address.
     Register Base = computeBase(MI, AnchorAddr);
@@ -1971,8 +1985,8 @@
       AM.BaseOffs = P.second - AnchorAddr.Offset;
 
       if (TLI->isLegalGlobalAddressingMode(AM)) {
-        LLVM_DEBUG(dbgs() << "  Promote Offset(" << P.second;
-                   dbgs() << ")"; P.first->dump());
+        LLVM_DEBUG(dbgs() << "  Promote Offset(" << P.second; dbgs() << ")";
+                   P.first->dump());
         updateBaseAndOffset(*P.first, Base, P.second - AnchorAddr.Offset);
         LLVM_DEBUG(dbgs() << "     After promotion: "; P.first->dump());
       }
@@ -1984,8 +1998,9 @@
   return false;
 }
 
-void SILoadStoreOptimizer::addInstToMergeableList(const CombineInfo &CI,
-                 std::list<std::list<CombineInfo> > &MergeableInsts) const {
+void SILoadStoreOptimizer::addInstToMergeableList(
+    const CombineInfo &CI,
+    std::list<std::list<CombineInfo>> &MergeableInsts) const {
   for (std::list<CombineInfo> &AddrList : MergeableInsts) {
     if (AddrList.front().InstClass == CI.InstClass &&
         AddrList.front().hasSameBaseAddress(*CI.I)) {
@@ -2005,14 +2020,15 @@
     std::list<std::list<CombineInfo>> &MergeableInsts) const {
   bool Modified = false;
 
-  // Sort potential mergeable instructions into lists.  One list per base address.
+  // Sort potential mergeable instructions into lists.  One list per base
+  // address.
   unsigned Order = 0;
   MachineBasicBlock::iterator BlockI = Begin;
   for (; BlockI != End; ++BlockI) {
     MachineInstr &MI = *BlockI;
 
-    // We run this before checking if an address is mergeable, because it can produce
-    // better code even if the instructions aren't mergeable.
+    // We run this before checking if an address is mergeable, because it can
+    // produce better code even if the instructions aren't mergeable.
     if (promoteConstantOffsetToImm(MI, Visited, AnchorList))
       Modified = true;
 
@@ -2045,12 +2061,13 @@
   // At this point we have lists of Mergeable instructions.
   //
   // Part 2: Sort lists by offset and then for each CombineInfo object in the
-  // list try to find an instruction that can be merged with I.  If an instruction
-  // is found, it is stored in the Paired field.  If no instructions are found, then
-  // the CombineInfo object is deleted from the list.
+  // list try to find an instruction that can be merged with I.  If an
+  // instruction is found, it is stored in the Paired field.  If no instructions
+  // are found, then the CombineInfo object is deleted from the list.
 
   for (std::list<std::list<CombineInfo>>::iterator I = MergeableInsts.begin(),
-                                                   E = MergeableInsts.end(); I != E;) {
+                                                   E = MergeableInsts.end();
+       I != E;) {
 
     std::list<CombineInfo> &MergeList = *I;
     if (MergeList.size() <= 1) {
@@ -2064,10 +2081,9 @@
     // Sort the lists by offsets, this way mergeable instructions will be
     // adjacent to each other in the list, which will make it easier to find
     // matches.
-    MergeList.sort(
-        [] (const CombineInfo &A, CombineInfo &B) {
-          return A.Offset < B.Offset;
-        });
+    MergeList.sort([](CombineInfo const &A, CombineInfo const &B) {
+      return A.Offset < B.Offset;
+    });
     ++I;
   }
 
@@ -2078,11 +2094,12 @@
 // the same base register. We rely on the scheduler to do the hard work of
 // clustering nearby loads, and assume these are all adjacent.
 bool SILoadStoreOptimizer::optimizeBlock(
-                       std::list<std::list<CombineInfo> > &MergeableInsts) {
+    std::list<std::list<CombineInfo>> &MergeableInsts) {
   bool Modified = false;
 
   for (std::list<std::list<CombineInfo>>::iterator I = MergeableInsts.begin(),
-                                                   E = MergeableInsts.end(); I != E;) {
+                                                   E = MergeableInsts.end();
+       I != E;) {
     std::list<CombineInfo> &MergeList = *I;
 
     bool OptimizeListAgain = false;
@@ -2107,10 +2124,8 @@
   return Modified;
 }
 
-bool
-SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
-                                          std::list<CombineInfo> &MergeList,
-                                          bool &OptimizeListAgain) {
+bool SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
+    std::list<CombineInfo> &MergeList, bool &OptimizeListAgain) {
   if (MergeList.empty())
     return false;