Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
===================================================================
--- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -28,6 +28,7 @@
 class MachineInstr;
 class MachineOperand;
 class GISelKnownBits;
+class MachineDominatorTree;
 
 struct PreferredTuple {
   LLT Ty;                // The result type of the extend.
@@ -41,10 +42,12 @@
   MachineRegisterInfo &MRI;
   GISelChangeObserver &Observer;
   GISelKnownBits *KB;
+  MachineDominatorTree *MDT;
 
 public:
   CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
-                 GISelKnownBits *KB = nullptr);
+                 GISelKnownBits *KB = nullptr,
+                 MachineDominatorTree *MDT = nullptr);
 
   /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
   void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
@@ -60,17 +63,61 @@
   bool matchCombineCopy(MachineInstr &MI);
   void applyCombineCopy(MachineInstr &MI);
 
+  /// \brief \returns true if \p DefMI precedes \p UseMI or they are the same
+  /// instruction. Both must be in the same basic block.
+  bool isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI);
+
+  /// \brief \returns true if \p DefMI dominates \p UseMI. By definition an
+  /// instruction dominates itself.
+  ///
+  /// If we haven't been provided with a MachineDominatorTree during
+  /// construction, this function returns a conservative result that tracks just
+  /// a single basic block.
+  bool dominates(MachineInstr &DefMI, MachineInstr &UseMI);
+
   /// If \p MI is extend that consumes the result of a load, try to combine it.
   /// Returns true if MI changed.
   bool tryCombineExtendingLoads(MachineInstr &MI);
   bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
   void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
 
+  /// Combine \p MI into a pre-indexed or post-indexed load/store operation if
+  /// legal and the surrounding code makes it useful.
+  bool tryCombineIndexedLoadStore(MachineInstr &MI);
+
   bool matchCombineBr(MachineInstr &MI);
   bool tryCombineBr(MachineInstr &MI);
 
   /// Optimize memcpy intrinsics et al, e.g. constant len calls.
   /// /p MaxLen if non-zero specifies the max length of a mem libcall to inline.
+  ///
+  /// For example (pre-indexed):
+  ///
+  ///     $addr = G_GEP $base, $offset
+  ///     [...]
+  ///     $val = G_LOAD $addr
+  ///     [...]
+  ///     $whatever = COPY $addr
+  ///
+  /// -->
+  ///
+  ///     $val, $addr = G_INDEXED_LOAD $base, $offset, 1 (IsPre)
+  ///     [...]
+  ///     $whatever = COPY $addr
+  ///
+  /// or (post-indexed):
+  ///
+  ///     G_STORE $val, $base
+  ///     [...]
+  ///     $addr = G_GEP $base, $offset
+  ///     [...]
+  ///     $whatever = COPY $addr
+  ///
+  /// -->
+  ///
+  ///     $addr = G_INDEXED_STORE $val, $base, $offset
+  ///     [...]
+  ///     $whatever = COPY $addr
   bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
 
   /// Try to transform \p MI by using all of the above
@@ -87,6 +134,20 @@
                       bool IsVolatile);
   bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
                       unsigned KnownLen, unsigned DstAlign, bool IsVolatile);
+
+  /// Given a non-indexed load or store instruction \p MI, find an offset that
+  /// can be usefully and legally folded into it as a post-indexing operation.
+  ///
+  /// \returns true if a candidate is found.
+  bool findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base,
+                              Register &Offset);
+
+  /// Given a non-indexed load or store instruction \p MI, find an offset that
+  /// can be usefully and legally folded into it as a pre-indexing operation.
+  ///
+  /// \returns true if a candidate is found.
+  bool findPreIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base,
+                             Register &Offset);
 };
 } // namespace llvm
 
Index: llvm/include/llvm/CodeGen/TargetLowering.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetLowering.h
+++ llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2983,6 +2983,14 @@
     return false;
   }
 
+  /// Returns true if the specified base+offset is a legal indexed addressing
+  /// mode for this target. \p MI is the load or store instruction that is being
+  /// considered for transformation.
+  virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
+                               bool IsPre, MachineRegisterInfo &MRI) const {
+    return false;
+  }
+
   /// Return the entry encoding for a jump table in the current function.  The
   /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
   virtual unsigned getJumpTableEncoding() const;
Index: llvm/include/llvm/Support/TargetOpcodes.def
===================================================================
--- llvm/include/llvm/Support/TargetOpcodes.def
+++ llvm/include/llvm/Support/TargetOpcodes.def
@@ -294,9 +294,21 @@
 /// Generic zeroext load
 HANDLE_TARGET_OPCODE(G_ZEXTLOAD)
 
+/// Generic indexed load (including anyext load)
+HANDLE_TARGET_OPCODE(G_INDEXED_LOAD)
+
+/// Generic indexed signext load
+HANDLE_TARGET_OPCODE(G_INDEXED_SEXTLOAD)
+
+/// Generic indexed zeroext load
+HANDLE_TARGET_OPCODE(G_INDEXED_ZEXTLOAD)
+
 /// Generic store.
 HANDLE_TARGET_OPCODE(G_STORE)
 
+/// Generic indexed store.
+HANDLE_TARGET_OPCODE(G_INDEXED_STORE)
+
 /// Generic atomic cmpxchg with internal success check.
 HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
 
Index: llvm/include/llvm/Target/GenericOpcodes.td
===================================================================
--- llvm/include/llvm/Target/GenericOpcodes.td
+++ llvm/include/llvm/Target/GenericOpcodes.td
@@ -767,6 +767,32 @@
   let mayLoad = 1;
 }
 
+// Generic indexed load. Combines a GEP with a load. $newaddr is set to $base + $offset.
+// If $am is 0 (post-indexed), then the value is loaded from $base; if $am is 1 (pre-indexed)
+//  then the value is loaded from $newaddr.
+def G_INDEXED_LOAD : GenericInstruction {
+  let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
+  let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
+  let hasSideEffects = 0;
+  let mayLoad = 1;
+}
+
+// Same as G_INDEXED_LOAD except that the load performed is sign-extending, as with G_SEXTLOAD.
+def G_INDEXED_SEXTLOAD : GenericInstruction {
+  let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
+  let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
+  let hasSideEffects = 0;
+  let mayLoad = 1;
+}
+
+// Same as G_INDEXED_LOAD except that the load performed is zero-extending, as with G_ZEXTLOAD.
+def G_INDEXED_ZEXTLOAD : GenericInstruction {
+  let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
+  let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
+  let hasSideEffects = 0;
+  let mayLoad = 1;
+}
+
 // Generic store. Expects a MachineMemOperand in addition to explicit operands.
 def G_STORE : GenericInstruction {
   let OutOperandList = (outs);
@@ -775,6 +801,15 @@
   let mayStore = 1;
 }
 
+// Combines a store with a GEP. See description of G_INDEXED_LOAD for indexing behaviour.
+def G_INDEXED_STORE : GenericInstruction {
+  let OutOperandList = (outs ptype0:$newaddr);
+  let InOperandList = (ins type1:$src, ptype0:$base, ptype2:$offset,
+                           unknown:$am);
+  let hasSideEffects = 0;
+  let mayStore = 1;
+}
+
 // Generic atomic cmpxchg with internal success check. Expects a
 // MachineMemOperand in addition to explicit operands.
 def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction {
Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
===================================================================
--- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -11,6 +11,7 @@
 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -22,10 +23,19 @@
 
 using namespace llvm;
 
+// Option to allow testing of the combiner while no targets know about indexed
+// addressing.
+static cl::opt<bool>
+    ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
+                       cl::desc("Force all indexed operations to be "
+                                "legal for the GlobalISel combiner"));
+
+
 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
-                               MachineIRBuilder &B, GISelKnownBits *KB)
+                               MachineIRBuilder &B, GISelKnownBits *KB,
+                               MachineDominatorTree *MDT)
     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
-      KB(KB) {
+      KB(KB), MDT(MDT) {
   (void)this->KB;
 }
 
@@ -349,6 +359,204 @@
   Observer.changedInstr(MI);
 }
 
+bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) {
+  assert(DefMI.getParent() == UseMI.getParent());
+  if (&DefMI == &UseMI)
+    return false;
+
+  // Loop through the basic block until we find one of the instructions.
+  MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
+  for (; &*I != &DefMI && &*I != &UseMI; ++I)
+    return &*I == &DefMI;
+
+  llvm_unreachable("Block must contain instructions");
+}
+
+bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) {
+  if (MDT)
+    return MDT->dominates(&DefMI, &UseMI);
+  else if (DefMI.getParent() != UseMI.getParent())
+    return false;
+
+  return isPredecessor(DefMI, UseMI);
+}
+
+bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
+                                            Register &Base, Register &Offset) {
+  auto &MF = *MI.getParent()->getParent();
+  const auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+  unsigned Opcode = MI.getOpcode();
+  assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
+         Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
+
+  Base = MI.getOperand(1).getReg();
+  MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
+  if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
+
+  for (auto &Use : MRI.use_instructions(Base)) {
+    if (Use.getOpcode() != TargetOpcode::G_GEP)
+      continue;
+
+    Offset = Use.getOperand(2).getReg();
+    if (!ForceLegalIndexing &&
+        !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
+      LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
+                        << Use);
+      continue;
+    }
+
+    // Make sure the offset calculation is before the potentially indexed op.
+    // FIXME: we really care about dependency here. The offset calculation might
+    // be movable.
+    MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
+    if (!OffsetDef || !dominates(*OffsetDef, MI)) {
+      LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
+                        << Use);
+      continue;
+    }
+
+    // FIXME: check whether all uses of Base are load/store with foldable
+    // addressing modes. If so, using the normal addr-modes is better than
+    // forming an indexed one.
+
+    bool MemOpDominatesAddrUses = true;
+    for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
+      if (!dominates(MI, GEPUse)) {
+        MemOpDominatesAddrUses = false;
+        break;
+      }
+    }
+
+    if (!MemOpDominatesAddrUses) {
+      LLVM_DEBUG(
+          dbgs() << "    Ignoring candidate as memop does not dominate uses: "
+                 << Use);
+      continue;
+    }
+
+    LLVM_DEBUG(dbgs() << "    Found match: " << Use);
+    Addr = Use.getOperand(0).getReg();
+    return true;
+  }
+
+  return false;
+}
+
+bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
+                                           Register &Base, Register &Offset) {
+  auto &MF = *MI.getParent()->getParent();
+  const auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+  unsigned Opcode = MI.getOpcode();
+  assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
+         Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
+
+  Addr = MI.getOperand(1).getReg();
+  MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI);
+  if (!AddrDef || MRI.hasOneUse(Addr))
+    return false;
+
+  Base = AddrDef->getOperand(1).getReg();
+  Offset = AddrDef->getOperand(2).getReg();
+
+  LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
+
+  if (!ForceLegalIndexing &&
+      !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
+    LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
+    return false;
+  }
+
+  MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
+  if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+    LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
+    return false;
+  }
+
+  if (MI.getOpcode() == TargetOpcode::G_STORE) {
+    // Would require a copy.
+    if (Base == MI.getOperand(0).getReg()) {
+      LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
+      return false;
+    }
+
+    // We're expecting one use of Addr in MI, but it could also be the
+    // value stored, which isn't actually dominated by the instruction.
+    if (MI.getOperand(0).getReg() == Addr) {
+      LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
+      return false;
+    }
+  }
+
+  // FIXME: check whether all uses of the base pointer are constant GEPs. That
+  // might allow us to end base's liveness here by adjusting the constant.
+
+  for (auto &UseMI : MRI.use_instructions(Addr)) {
+    if (!dominates(MI, UseMI)) {
+      LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
+  unsigned Opcode = MI.getOpcode();
+  if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
+      Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
+    return false;
+
+  bool IsStore = Opcode == TargetOpcode::G_STORE;
+  Register Addr, Base, Offset;
+  bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset);
+  if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset))
+    return false;
+
+
+  unsigned NewOpcode;
+  switch (Opcode) {
+  case TargetOpcode::G_LOAD:
+    NewOpcode = TargetOpcode::G_INDEXED_LOAD;
+    break;
+  case TargetOpcode::G_SEXTLOAD:
+    NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
+    break;
+  case TargetOpcode::G_ZEXTLOAD:
+    NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
+    break;
+  case TargetOpcode::G_STORE:
+    NewOpcode = TargetOpcode::G_INDEXED_STORE;
+    break;
+  default:
+    llvm_unreachable("Unknown load/store opcode");
+  }
+
+  MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr);
+  MachineIRBuilder MIRBuilder(MI);
+  auto MIB = MIRBuilder.buildInstr(NewOpcode);
+  if (IsStore) {
+    MIB.addDef(Addr);
+    MIB.addUse(MI.getOperand(0).getReg());
+  } else {
+    MIB.addDef(MI.getOperand(0).getReg());
+    MIB.addDef(Addr);
+  }
+
+  MIB.addUse(Base);
+  MIB.addUse(Offset);
+  MIB.addImm(IsPre);
+  MI.eraseFromParent();
+  AddrDef.eraseFromParent();
+
+  LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
+  return true;
+}
+
 bool CombinerHelper::matchCombineBr(MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR");
   // Try to match the following:
@@ -919,5 +1127,9 @@
 bool CombinerHelper::tryCombine(MachineInstr &MI) {
   if (tryCombineCopy(MI))
     return true;
-  return tryCombineExtendingLoads(MI);
+  if (tryCombineExtendingLoads(MI))
+    return true;
+  if (tryCombineIndexedLoadStore(MI))
+    return true;
+  return false;
 }
Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -36,7 +36,7 @@
 
 /// NOTE: The TargetMachine owns TLOF.
 TargetLowering::TargetLowering(const TargetMachine &tm)
-  : TargetLoweringBase(tm) {}
+    : TargetLoweringBase(tm) {}
 
 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
   return nullptr;
Index: llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
+++ llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/Support/Debug.h"
@@ -29,13 +30,14 @@
 namespace {
 class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
   GISelKnownBits *KB;
+  MachineDominatorTree *MDT;
 
 public:
   AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
-                                  GISelKnownBits *KB)
+                                  GISelKnownBits *KB, MachineDominatorTree *MDT)
       : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
                      /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
-        KB(KB) {}
+        KB(KB), MDT(MDT) {}
   virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
                        MachineIRBuilder &B) const override;
 };
@@ -43,7 +45,7 @@
 bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
                                               MachineInstr &MI,
                                               MachineIRBuilder &B) const {
-  CombinerHelper Helper(Observer, B, KB);
+  CombinerHelper Helper(Observer, B, KB, MDT);
 
   switch (MI.getOpcode()) {
   default:
@@ -54,8 +56,14 @@
     return Helper.tryCombineBr(MI);
   case TargetOpcode::G_LOAD:
   case TargetOpcode::G_SEXTLOAD:
-  case TargetOpcode::G_ZEXTLOAD:
-    return Helper.tryCombineExtendingLoads(MI);
+  case TargetOpcode::G_ZEXTLOAD: {
+    bool Changed = false;
+    Changed |= Helper.tryCombineExtendingLoads(MI);
+    Changed |= Helper.tryCombineIndexedLoadStore(MI);
+    return Changed;
+  }
+  case TargetOpcode::G_STORE:
+    return Helper.tryCombineIndexedLoadStore(MI);
   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
     switch (MI.getIntrinsicID()) {
     case Intrinsic::memcpy:
@@ -99,6 +107,8 @@
   getSelectionDAGFallbackAnalysisUsage(AU);
   AU.addRequired<GISelKnownBitsAnalysis>();
   AU.addPreserved<GISelKnownBitsAnalysis>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addPreserved<MachineDominatorTree>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -115,8 +125,9 @@
   bool EnableOpt =
       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+  MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
   AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
-                                         F.hasMinSize(), KB);
+                                         F.hasMinSize(), KB, MDT);
   Combiner C(PCInfo, TPC);
   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
 }
Index: llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-prelegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s
+
+define i8* @test_simple_load_pre(i8* %ptr) {
+; CHECK-LABEL: name: test_simple_load_pre
+; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
+; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
+; CHECK-NOT: G_GEP
+; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 1
+; CHECK: $x0 = COPY [[NEXT]](p0)
+
+  %next = getelementptr i8, i8* %ptr, i32 42
+  load volatile i8, i8* %next
+  ret i8* %next
+}
+
+define void @test_load_multiple_dominated(i8* %ptr, i1 %tst, i1 %tst2) {
+; CHECK-LABEL: name: test_load_multiple_dominated
+; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
+; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
+; CHECK-NOT: G_GEP
+; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 1
+; CHECK: $x0 = COPY [[NEXT]](p0)
+  %next = getelementptr i8, i8* %ptr, i32 42
+  br i1 %tst, label %do_load, label %end
+
+do_load:
+  load volatile i8, i8* %next
+  br i1 %tst2, label %bb1, label %bb2
+
+bb1:
+  store volatile i8* %next, i8** undef
+  ret void
+
+bb2:
+  call void @bar(i8* %next)
+  ret void
+
+end:
+  ret void
+}
+
+define i8* @test_simple_store_pre(i8* %ptr) {
+; CHECK-LABEL: name: test_simple_store_pre
+; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
+; CHECK: [[VAL:%.*]]:_(s8) = G_CONSTANT i8 0
+; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
+; CHECK-NOT: G_GEP
+; CHECK: [[NEXT:%.*]]:_(p0) = G_INDEXED_STORE [[VAL]](s8), [[BASE]], [[OFFSET]](s64), 1
+; CHECK: $x0 = COPY [[NEXT]](p0)
+
+  %next = getelementptr i8, i8* %ptr, i32 42
+  store volatile i8 0, i8* %next
+  ret i8* %next
+}
+
+; The potentially pre-indexed address is used as the value stored. Converting
+; would produce the value too late but only by one instruction.
+define i64** @test_store_pre_val_loop(i64** %ptr) {
+; CHECK-LABEL: name: test_store_pre_val_loop
+; CHECK: G_GEP
+; CHECK: G_STORE %
+
+  %next = getelementptr i64*, i64** %ptr, i32 42
+  %next.p0 = bitcast i64** %next to i64*
+  store volatile i64* %next.p0, i64** %next
+  ret i64** %next
+}
+
+; Potentially pre-indexed address is used between GEP computing it and load.
+define i8* @test_load_pre_before(i8* %ptr) {
+; CHECK-LABEL: name: test_load_pre_before
+; CHECK: G_GEP
+; CHECK: BL @bar
+; CHECK: G_LOAD %
+
+  %next = getelementptr i8, i8* %ptr, i32 42
+  call void @bar(i8* %next)
+  load volatile i8, i8* %next
+  ret i8* %next
+}
+
+; Materializing the base into a writable register (from sp/fp) would be just as
+; bad as the original GEP.
+define i8* @test_alloca_load_pre() {
+; CHECK-LABEL: name: test_alloca_load_pre
+; CHECK: G_GEP
+; CHECK: G_LOAD %
+
+  %ptr = alloca i8, i32 128
+  %next = getelementptr i8, i8* %ptr, i32 42
+  load volatile i8, i8* %next
+  ret i8* %next
+}
+
+; Load does not dominate use of its address. No indexing.
+define i8* @test_pre_nodom(i8* %in, i1 %tst) {
+; CHECK-LABEL: name: test_pre_nodom
+; CHECK: G_GEP
+; CHECK: G_LOAD %
+
+  %next = getelementptr i8, i8* %in, i32 16
+  br i1 %tst, label %do_indexed, label %use_addr
+
+do_indexed:
+  %val = load i8, i8* %next
+  store i8 %val, i8* @var
+  store i8* %next, i8** @varp8
+  br label %use_addr
+
+use_addr:
+  ret i8* %next
+}
+
+define i8* @test_simple_load_post(i8* %ptr) {
+; CHECK-LABEL: name: test_simple_load_post
+; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
+; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
+; CHECK-NOT: G_GEP
+; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 0
+; CHECK: $x0 = COPY [[NEXT]](p0)
+
+  %next = getelementptr i8, i8* %ptr, i32 42
+  load volatile i8, i8* %ptr
+  ret i8* %next
+}
+
+define i8* @test_simple_load_post_gep_after(i8* %ptr) {
+; CHECK-LABEL: name: test_simple_load_post_gep_after
+; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
+; CHECK: BL @get_offset
+; CHECK: [[OFFSET:%.*]]:_(s64) = COPY $x0
+; CHECK: {{%.*}}:_(s8), [[ADDR:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 0
+; CHECK: $x0 = COPY [[ADDR]](p0)
+
+  %offset = call i64 @get_offset()
+  load volatile i8, i8* %ptr
+  %next = getelementptr i8, i8* %ptr, i64 %offset
+  ret i8* %next
+}
+
+define i8* @test_load_post_keep_looking(i8* %ptr) {
+; CHECK: name: test_load_post_keep_looking
+; CHECK: G_INDEXED_LOAD
+
+  %offset = call i64 @get_offset()
+  load volatile i8, i8* %ptr
+  %intval = ptrtoint i8* %ptr to i8
+  store i8 %intval, i8* @var
+
+  %next = getelementptr i8, i8* %ptr, i64 %offset
+  ret i8* %next
+}
+
+; Base is frame index. Using indexing would need copy anyway.
+define i8* @test_load_post_alloca() {
+; CHECK-LABEL: name: test_load_post_alloca
+; CHECK: G_GEP
+; CHECK: G_LOAD %
+
+  %ptr = alloca i8, i32 128
+  %next = getelementptr i8, i8* %ptr, i32 42
+  load volatile i8, i8* %ptr
+  ret i8* %next
+}
+
+; Offset computation does not dominate the load we might be indexing.
+define i8* @test_load_post_gep_offset_after(i8* %ptr) {
+; CHECK-LABEL: name: test_load_post_gep_offset_after
+; CHECK: G_LOAD %
+; CHECK: BL @get_offset
+; CHECK: G_GEP
+
+  load volatile i8, i8* %ptr
+  %offset = call i64 @get_offset()
+  %next = getelementptr i8, i8* %ptr, i64 %offset
+  ret i8* %next
+}
+
+declare void @bar(i8*)
+declare i64 @get_offset()
+@var = global i8 0
+@varp8 = global i8* null
Index: llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll
===================================================================
--- llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll
+++ llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll
@@ -44,6 +44,7 @@
 ; ENABLED:       IRTranslator
 ; VERIFY-NEXT:   Verify generated machine code
 ; ENABLED-NEXT:  Analysis for ComputingKnownBits
+; ENABLED-NEXT:  MachineDominator Tree Construction
 ; ENABLED-NEXT:  PreLegalizerCombiner
 ; VERIFY-NEXT:   Verify generated machine code
 ; ENABLED-NEXT:  Analysis containing CSE Info
Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
===================================================================
--- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -134,9 +134,21 @@
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_INDEXED_LOAD (opcode 64): 3 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: G_INDEXED_SEXTLOAD (opcode 65): 3 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: G_INDEXED_ZEXTLOAD (opcode 66): 3 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: G_STORE (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_INDEXED_STORE (opcode 68): 3 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: G_ATOMIC_CMPXCHG_WITH_SUCCESS (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
Index: llvm/test/CodeGen/AArch64/O0-pipeline.ll
===================================================================
--- llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -36,6 +36,7 @@
 ; CHECK-NEXT:       Analysis containing CSE Info
 ; CHECK-NEXT:       IRTranslator
 ; CHECK-NEXT:       Analysis for ComputingKnownBits
+; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       AArch64PreLegalizerCombiner
 ; CHECK-NEXT:       Analysis containing CSE Info
 ; CHECK-NEXT:       Legalizer