Index: lib/Transforms/Scalar/GVN.cpp
===================================================================
--- lib/Transforms/Scalar/GVN.cpp
+++ lib/Transforms/Scalar/GVN.cpp
@@ -696,6 +696,8 @@
 
     // Helper fuctions of redundant load elimination 
     bool processLoad(LoadInst *L);
+    bool loadGEPSelect(LoadInst *LI, MemDepResult Dep);
+
     bool processNonLocalLoad(LoadInst *L);
     void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 
                                  AvailValInBlkVect &ValuesPerBlock,
@@ -1817,6 +1819,127 @@
   I->replaceAllUsesWith(Repl);
 }
 
+// Helper function for loadGEPSelect().  Look for the following pattern:
+//
+// (load (gep ptr idx0 idx1 idx2 ))
+//
+// Only match above patterns which satisfy the following additional
+// constraints:
+//   - The inbounds attribute must match the inbounds attribute
+//     of the base GEP
+//   - The number of indicies must match.
+//   - The operands, including the pointer operand, only differ by one operand,
+//     which is the operand fed by a select.
+//
+static bool isNearMatchingLoadGEP(Value *Inst, GetElementPtrInst *BaseGEP,
+                                  unsigned SIIdx) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+    Value *Ptr = LI->getPointerOperand();
+
+    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
+      if (GEPI->isInBounds() != BaseGEP->isInBounds() ||
+          GEPI->getNumIndices() != BaseGEP->getNumIndices())
+        return false;
+
+      // Compute the number of matching indices.
+      unsigned NumMatch = 0;
+      unsigned NumOps = GEPI->getNumOperands();
+      for (unsigned i = 0; i != NumOps; ++i, ++NumMatch)
+        if (GEPI->getOperand(i) != BaseGEP->getOperand(i))
+          if (i != SIIdx)
+            break;
+
+      return NumMatch == NumOps;
+    }
+  }
+  return false;
+}
+
+// Do the following transformation:
+//
+// a = (load (gep ptr_op idx idx_a))
+// b = (load (gep ptr_op idx idx_b))
+// c = (load (gep ptr_op idx (select (cond, idx_a, idx_b))))
+//   -->
+// a = (load (gep ptr_op idx idx_a))
+// b = (load (gep ptr_op idx idx_b))
+// c = (select (cond, a, b))
+//
+bool GVN::loadGEPSelect(LoadInst *LI, MemDepResult Dep) {
+  Value *Op = LI->getOperand(0);
+
+  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op);
+  if (!GEPI)
+    return false;
+
+  // Check whether the GEP is using the result of a select.
+  unsigned SIIdx = 0;
+  SelectInst *SI = nullptr;
+  for (unsigned i = 0, e = GEPI->getNumOperands(); i != e; ++i) {
+    Value *Op0 = GEPI->getOperand(i);
+    if (ZExtInst *Ext = dyn_cast<ZExtInst>(Op0))
+      Op0 = Ext->getOperand(0);
+    if (isa<SelectInst>(Op0)) {
+      SIIdx = i;
+      SI = cast<SelectInst>(Op0);
+      break;
+    }
+  }
+  if (!SI)
+    return false;
+
+  Value *SelectTrueOp = SI->getTrueValue();
+  Value *SelectFalseOp = SI->getFalseValue();
+
+  // Scan from the load until we encounter a dependent instruction or the
+  // beginning of the block.
+  Instruction *DepInst = Dep.getInst();
+  BasicBlock::iterator ScanFrom = LI, ScanEnd;
+  if (DepInst)
+    ScanEnd = DepInst;
+  else
+    ScanEnd = LI->getParent()->begin();
+
+  Value *TrueLoad = nullptr;
+  Value *FalseLoad = nullptr;
+
+  // Scan the basic block backwards looking for loads with nearly matching geps.
+  while (ScanFrom != ScanEnd) {
+    Instruction *Inst = --ScanFrom;
+
+    if (!isNearMatchingLoadGEP(Inst, GEPI, SIIdx))
+      continue;
+
+    Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
+    GetElementPtrInst *GEPI2 = cast<GetElementPtrInst>(Ptr);
+    Value *GEPOp = GEPI2->getOperand(SIIdx);
+
+    // Look through zext.
+    if (ZExtInst *Ext = dyn_cast<ZExtInst>(GEPOp))
+      GEPOp = Ext->getOperand(0);
+
+    // Does the gep op match either of the select ops?
+    if (GEPOp == SelectTrueOp)
+      TrueLoad = Inst;
+    else if (GEPOp == SelectFalseOp)
+      FalseLoad = Inst;
+
+    // All preconditions fulfilled, apply the transformation:
+    if (TrueLoad && FalseLoad) {
+      Value *V = SelectInst::Create(SI->getCondition(), TrueLoad, FalseLoad,
+                                    "ld-gep-sel", LI);
+      LI->replaceAllUsesWith(V);
+
+      if (V->getType()->getScalarType()->isPointerTy())
+        MD->invalidateCachedPointerInfo(V);
+      markInstructionForDeletion(LI);
+      ++NumGVNLoad;
+      return true;
+    }
+  }
+  return false;
+}
+
 /// Attempt to eliminate a load, first by eliminating it
 /// locally, and then attempting non-local elimination if that fails.
 bool GVN::processLoad(LoadInst *L) {
@@ -1910,6 +2033,13 @@
     return false;
   }
 
+  // Attempt to eliminate redundant loads whose addresses are dependent on
+  // the result of a select instruction. If both values have already been
+  // loaded into registers the load becomes redundant and can be replaced
+  // with a select on the two values instead.
+  if (loadGEPSelect(L, Dep))
+    return true;
+
   // If it is defined in another block, try harder.
   if (Dep.isNonLocal())
     return processNonLocalLoad(L);
Index: test/Transforms/GVN/load-gep-select.ll
===================================================================
--- /dev/null
+++ test/Transforms/GVN/load-gep-select.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%struct.s = type { i32 }
+
+@vec = common global i32* null, align 8
+@agg = common global %struct.s** null, align 8
+
+; unsigned *vec;
+;
+; unsigned test1(unsigned i, unsigned max) {
+;   if (vec[i] > vec[i+1])
+;     ++i;
+;   if (vec[i] > max) <- Remove redundant loads
+;     return 1;
+;   return 0;
+; }
+
+; CHECK-LABEL: test1
+; CHECK: load i32, i32* %arrayidx, align 4
+; CHECK: load i32, i32* %arrayidx2, align 4
+; CHECK-NOT: load
+; CHECK: ret i32
+
+define i32 @test1(i32 %i, i32 %max) {
+entry:
+  %idxprom = zext i32 %i to i64
+  %0 = load i32*, i32** @vec, align 8
+  %arrayidx = getelementptr inbounds i32, i32* %0, i64 %idxprom
+  %1 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %i, 1
+  %idxprom1 = zext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom1
+  %2 = load i32, i32* %arrayidx2, align 4
+  %cmp = icmp ugt i32 %1, %2
+  %add.i = select i1 %cmp, i32 %add, i32 %i
+  %idxprom3 = zext i32 %add.i to i64
+  %arrayidx4 = getelementptr inbounds i32, i32* %0, i64 %idxprom3
+  %3 = load i32, i32* %arrayidx4, align 4
+  %cmp5 = icmp ugt i32 %3, %max
+  %retval.0 = zext i1 %cmp5 to i32
+  ret i32 %retval.0
+}
+
+; struct s {
+;   unsigned field;
+; };
+; struct s **agg;
+;
+; unsigned test2(unsigned i, unsigned max) {
+;   if (agg[i]->field > agg[i+1]->field)
+;     ++i;
+;   if (agg[i]->field > max) <- Remove redundant loads
+;     return 1;
+;   return 0;
+; }
+
+; CHECK-LABEL: test2
+; CHECK: load %struct.s*, %struct.s** %arrayidx, align 8
+; CHECK: load i32, i32* %field, align 4
+; CHECK: load %struct.s*, %struct.s** %arrayidx2, align 8
+; CHECK: load i32, i32* %field3, align 4
+; CHECK-NOT: load
+; CHECK-NOT: load
+; CHECK: ret i32
+
+define i32 @test2(i32 %i, i32 %max) {
+entry:
+  %idxprom = zext i32 %i to i64
+  %0 = load %struct.s**, %struct.s*** @agg, align 8
+  %arrayidx = getelementptr inbounds %struct.s*, %struct.s** %0, i64 %idxprom
+  %1 = load %struct.s*, %struct.s** %arrayidx, align 8
+  %field = getelementptr inbounds %struct.s, %struct.s* %1, i64 0, i32 0
+  %2 = load i32, i32* %field, align 4
+  %add = add i32 %i, 1
+  %idxprom1 = zext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds %struct.s*, %struct.s** %0, i64 %idxprom1
+  %3 = load %struct.s*, %struct.s** %arrayidx2, align 8
+  %field3 = getelementptr inbounds %struct.s, %struct.s* %3, i64 0, i32 0
+  %4 = load i32, i32* %field3, align 4
+  %cmp = icmp ugt i32 %2, %4
+  %add.i = select i1 %cmp, i32 %add, i32 %i
+  %idxprom4 = zext i32 %add.i to i64
+  %arrayidx5 = getelementptr inbounds %struct.s*, %struct.s** %0, i64 %idxprom4
+  %5 = load %struct.s*, %struct.s** %arrayidx5, align 8
+  %field6 = getelementptr inbounds %struct.s, %struct.s* %5, i64 0, i32 0
+  %6 = load i32, i32* %field6, align 4
+  %cmp7 = icmp ugt i32 %6, %max
+  %retval.0 = zext i1 %cmp7 to i32
+  ret i32 %retval.0
+}