Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -468,6 +468,179 @@
   return nullptr;
 }
 
+// If we can determine that all possible objects pointed to by the provided
+// pointer value are, not only dereferenceable, but also definitively less than
+// or equal to the provided maximum size, then return true. Otherwise, return
+// false (constant global values and allocas fall into this category).
+static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
+                                     const DataLayout *DL) {
+  SmallPtrSet<Value *, 4> Visited;
+  SmallVector<Value *, 4> Worklist(1, V);
+
+  do {
+    Value *P = Worklist.pop_back_val();
+    P = P->stripPointerCasts();
+
+    if (!Visited.insert(P).second)
+      continue;
+
+    if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
+      Worklist.push_back(SI->getTrueValue());
+      Worklist.push_back(SI->getFalseValue());
+      continue;
+    }
+
+    if (PHINode *PN = dyn_cast<PHINode>(P)) {
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        Worklist.push_back(PN->getIncomingValue(i));
+      continue;
+    }
+
+    if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
+      if (GA->mayBeOverridden())
+        return false;
+      Worklist.push_back(GA->getAliasee());
+      continue;
+    }
+
+    // If we know how big this object is, and it is less than MaxSize, continue
+    // searching. Otherwise, return false.
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
+      if (!AI->getAllocatedType()->isSized())
+        return false;
+
+      ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
+      if (!CS)
+        return false;
+
+      uint64_t TypeSize = DL->getTypeAllocSize(AI->getAllocatedType());
+      // Make sure that, even if the multiplication below would wrap as an
+      // uint64_t, we still do the right thing.
+      if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
+        return false;
+      continue;
+    }
+
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+      if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
+        return false;
+
+      uint64_t InitSize = DL->getTypeAllocSize(GV->getType()->getElementType());
+      if (InitSize > MaxSize)
+        return false;
+      continue;
+    }
+
+    return false;
+  } while (!Worklist.empty());
+
+  return true;
+}
+
+// If we're indexing into an object of a known size, and the outer index is
+// not a constant, but having any value but zero would lead to undefined
+// behavior, replace it with zero.
+//
+// For example, if we have:
+// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
+// ...
+// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
+// ... = load i32* %arrayidx, align 4
+// Then we know that we can replace %x in the GEP with i64 0.
+//
+// FIXME: We could fold any GEP index to zero that would cause UB if it were
+// not zero. Currently, we only handle the first such index. Also, we could
+// also search through non-zero constant indices if we kept track of the
+// offsets those indices implied.
+static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
+                                     Instruction *MemI, unsigned &Idx) {
+  const DataLayout *DL = IC.getDataLayout();
+  if (GEPI->getNumOperands() < 2 || !DL)
+    return false;
+
+  // Find the first non-zero index of a GEP. If all indices are zero, return
+  // one past the last index.
+  auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
+    unsigned I = 1;
+    for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
+      Value *V = GEPI->getOperand(I);
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+        if (CI->isZero())
+          continue;
+
+      break;
+    }
+
+    return I;
+  };
+
+  // Skip through initial 'zero' indices, and find the corresponding pointer
+  // type. See if the next index is not a constant.
+  Idx = FirstNZIdx(GEPI);
+  if (Idx == GEPI->getNumOperands())
+    return false;
+  if (isa<Constant>(GEPI->getOperand(Idx)))
+    return false;
+
+  SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
+  Type *AllocTy =
+    GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops);
+  if (!AllocTy || !AllocTy->isSized())
+    return false;
+  uint64_t TyAllocSize = DL->getTypeAllocSize(AllocTy);
+
+  // If there are more indices after the one we might replace with a zero, make
+  // sure they're all non-negative. If any of them are negative, the overall
+  // address being computed might be before the base address determined by the
+  // first non-zero index.
+  auto AllNonNegative = [&]() -> bool {
+    for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
+      bool KnownNonNegative, KnownNegative;
+      IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
+                        KnownNegative, 0, MemI);
+      if (KnownNonNegative)
+        continue;
+      return false;
+    }
+
+    return true;
+  };
+
+  // FIXME: If the GEP is not inbounds, and there are extra indices after the
+  // one we'll replace, those could cause the address computation to wrap
+  // (rendering the AllNonNegative() check below insufficient). We can do
+  // better, ignoring zero indicies (and other indicies we can prove small
+  // enough not to wrap).
+  if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
+    return false;
+
+  // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
+  // also known to be dereferenceable.
+  return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
+         AllNonNegative();
+}
+
+// If we're indexing into an object with a variable index for the memory
+// access, but the object has only one element, we can assume that the index
+// will always be zero. If we replace the GEP, return it.
+template <typename T>
+static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
+                                          T &MemI) {
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
+    unsigned Idx;
+    if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
+      Instruction *NewGEPI = GEPI->clone();
+      NewGEPI->setOperand(Idx,
+        ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
+      NewGEPI->insertBefore(GEPI);
+      MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
+      return NewGEPI;
+    }
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   Value *Op = LI.getOperand(0);
 
@@ -489,6 +662,12 @@
       LI.setAlignment(EffectiveLoadAlign);
   }
 
+  // Replace GEP indices if possible.
+  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
+      Worklist.Add(NewGEPI);
+      return &LI;
+  }
+
   // None of the following transforms are legal for volatile/atomic loads.
   // FIXME: Some of it is okay for atomic loads; needs refactoring.
   if (!LI.isSimple()) return nullptr;
@@ -663,6 +842,12 @@
       SI.setAlignment(EffectiveStoreAlign);
   }
 
+  // Replace GEP indices if possible.
+  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
+      Worklist.Add(NewGEPI);
+      return &SI;
+  }
+
   // Don't hack volatile/atomic stores.
   // FIXME: Some bits are legal for atomic stores; needs refactoring.
   if (!SI.isSimple()) return nullptr;
Index: test/Transforms/InstCombine/load-cmp.ll
===================================================================
--- test/Transforms/InstCombine/load-cmp.ll
+++ test/Transforms/InstCombine/load-cmp.ll
@@ -230,7 +230,7 @@
 ; NODL: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
 
 ; P32-LABEL: @test10_struct(
-; P32: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+; P32: ret i1 false
   %p = getelementptr inbounds %Foo* @GS, i32 %x, i32 0
   %q = load i32* %p
   %r = icmp eq i32 %q, 9
@@ -256,8 +256,7 @@
 ; NODL: getelementptr inbounds %Foo* @GS, i16 %x, i32 0
 
 ; P32-LABEL: @test10_struct_i16(
-; P32: %1 = sext i16 %x to i32
-; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+; P32: ret i1 false
   %p = getelementptr inbounds %Foo* @GS, i16 %x, i32 0
   %q = load i32* %p
   %r = icmp eq i32 %q, 0
@@ -271,8 +270,7 @@
 ; NODL: getelementptr inbounds %Foo* @GS, i64 %x, i32 0
 
 ; P32-LABEL: @test10_struct_i64(
-; P32: %1 = trunc i64 %x to i32
-; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+; P32: ret i1 false
   %p = getelementptr inbounds %Foo* @GS, i64 %x, i32 0
   %q = load i32* %p
   %r = icmp eq i32 %q, 0
Index: test/Transforms/InstCombine/mem-gep-zidx.ll
===================================================================
--- /dev/null
+++ test/Transforms/InstCombine/mem-gep-zidx.ll
@@ -0,0 +1,48 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
+@f.b = private unnamed_addr constant [1 x i32] [i32 55], align 4
+
+define signext i32 @test1(i32 signext %x) #0 {
+entry:
+  %idxprom = sext i32 %x to i64
+  %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @test1
+; CHECK: ret i32 12
+}
+
+declare void @foo(i64* %p)
+define void @test2(i32 signext %x, i64 %v) #0 {
+entry:
+  %p = alloca i64
+  %idxprom = sext i32 %x to i64
+  %arrayidx = getelementptr inbounds i64* %p, i64 %idxprom
+  store i64 %v, i64* %arrayidx
+  call void @foo(i64* %p)
+  ret void
+
+; CHECK-LABEL: @test2
+; CHECK: %p = alloca i64
+; CHECK: store i64 %v, i64* %p
+; CHECK: ret void
+}
+
+define signext i32 @test3(i32 signext %x, i1 %y) #0 {
+entry:
+  %idxprom = sext i32 %x to i64
+  %p = select i1 %y, [1 x i32]* @f.a, [1 x i32]* @f.b
+  %arrayidx = getelementptr inbounds [1 x i32]* %p, i64 0, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @test3
+; CHECK: getelementptr inbounds [1 x i32]* %p, i64 0, i64 0
+}
+
+attributes #0 = { nounwind readnone }
+