Index: include/llvm/IR/DataLayout.h
===================================================================
--- include/llvm/IR/DataLayout.h
+++ include/llvm/IR/DataLayout.h
@@ -453,6 +453,12 @@
   /// are set.
   unsigned getLargestLegalIntTypeSizeInBits() const;
 
+  /// \brief Returns the type of a GEP index.
+  /// It is a smaller type between IntPtrType and the largest supported
+  /// integer type. We can't take the IntPtrType for index, since the target may
+  /// not support full arithmetic for it.
+  Type *getIndexType(Type *PtrTy) const;
+
   /// \brief Returns the offset from the beginning of the type for the specified
   /// indices.
   ///
Index: lib/Analysis/ConstantFolding.cpp
===================================================================
--- lib/Analysis/ConstantFolding.cpp
+++ lib/Analysis/ConstantFolding.cpp
@@ -730,8 +730,8 @@
 Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
                          Type *ResultTy, Optional<unsigned> InRangeIndex,
                          const DataLayout &DL, const TargetLibraryInfo *TLI) {
-  Type *IntPtrTy = DL.getIntPtrType(ResultTy);
-  Type *IntPtrScalarTy = IntPtrTy->getScalarType();
+  Type *IndexTy = DL.getIndexType(ResultTy);
+  Type *IndexScalarTy = IndexTy->getScalarType();
 
   bool Any = false;
   SmallVector<Constant*, 32> NewIdxs;
@@ -739,11 +739,11 @@
     if ((i == 1 ||
          !isa<StructType>(GetElementPtrInst::getIndexedType(
              SrcElemTy, Ops.slice(1, i - 1)))) &&
-        Ops[i]->getType()->getScalarType() != IntPtrScalarTy) {
+        Ops[i]->getType()->getScalarType() != IndexScalarTy) {
       Any = true;
       Type *NewType = Ops[i]->getType()->isVectorTy()
-                          ? IntPtrTy
-                          : IntPtrTy->getScalarType();
+                          ? IndexTy
+                          : IndexScalarTy;
       NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
                                                                       true,
                                                                       NewType,
@@ -803,33 +803,37 @@
   if (!Ptr->getType()->isPointerTy())
     return nullptr;
 
+  Type *IndexTy = DL.getIndexType(Ptr->getType());
   Type *IntPtrTy = DL.getIntPtrType(Ptr->getType());
 
   // If this is a constant expr gep that is effectively computing an
   // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
-  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
-    if (!isa<ConstantInt>(Ops[i])) {
-
-      // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
-      // "inttoptr (sub (ptrtoint Ptr), V)"
-      if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
-        auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
-        assert((!CE || CE->getType() == IntPtrTy) &&
-               "CastGEPIndices didn't canonicalize index types!");
-        if (CE && CE->getOpcode() == Instruction::Sub &&
-            CE->getOperand(0)->isNullValue()) {
-          Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
-          Res = ConstantExpr::getSub(Res, CE->getOperand(1));
-          Res = ConstantExpr::getIntToPtr(Res, ResTy);
-          if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI))
-            Res = FoldedRes;
-          return Res;
+  // The transformation is legal if GEP index and pointer have the same width. 
+  if (IndexTy->getPrimitiveSizeInBits() == IntPtrTy->getPrimitiveSizeInBits()) {
+    for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
+      if (!isa<ConstantInt>(Ops[i])) {
+
+        // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
+        // "inttoptr (sub (ptrtoint Ptr), V)"
+        if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
+          auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
+          assert((!CE || CE->getType() == IntPtrTy) &&
+                 "CastGEPIndices didn't canonicalize index types!");
+          if (CE && CE->getOpcode() == Instruction::Sub &&
+              CE->getOperand(0)->isNullValue()) {
+            Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
+            Res = ConstantExpr::getSub(Res, CE->getOperand(1));
+            Res = ConstantExpr::getIntToPtr(Res, ResTy);
+            if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI))
+              Res = FoldedRes;
+            return Res;
+          }
         }
+        return nullptr;
       }
-      return nullptr;
     }
-
-  unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy);
+  }
+  unsigned BitWidth = DL.getTypeSizeInBits(IndexTy);
   APInt Offset =
       APInt(BitWidth,
             DL.getIndexedOffsetInType(
@@ -909,7 +913,7 @@
         // The element size is 0. This may be [0 x Ty]*, so just use a zero
         // index for this level and proceed to the next level to see if it can
         // accommodate the offset.
-        NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0));
+        NewIdxs.push_back(ConstantInt::get(IndexTy, 0));
       } else {
         // The element size is non-zero divide the offset by the element
         // size (rounding down), to compute the index at this level.
@@ -918,7 +922,7 @@
         if (Overflow)
           break;
         Offset -= NewIdx * ElemSize;
-        NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx));
+        NewIdxs.push_back(ConstantInt::get(IndexTy, NewIdx));
       }
     } else {
       auto *STy = cast<StructType>(Ty);
Index: lib/IR/DataLayout.cpp
===================================================================
--- lib/IR/DataLayout.cpp
+++ lib/IR/DataLayout.cpp
@@ -726,6 +726,19 @@
   return Max != LegalIntWidths.end() ? *Max : 0;
 }
 
+Type *DataLayout::getIndexType(Type *Ty) const {
+  assert(Ty->isPtrOrPtrVectorTy() &&
+         "Expected a pointer or pointer vector type.");
+  unsigned PointerSize = getPointerTypeSizeInBits(Ty);
+  unsigned LargestIntSize = getLargestLegalIntTypeSizeInBits();
+  unsigned NumBits =
+      LargestIntSize ? std::min(PointerSize, LargestIntSize) : PointerSize;
+  IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits);
+  if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
+    return VectorType::get(IntTy, VecTy->getNumElements());
+  return IntTy;
+}
+
 int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
                                            ArrayRef<Value *> Indices) const {
   int64_t Result = 0;
Index: lib/Transforms/InstCombine/InstCombineCompares.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -681,8 +681,7 @@
   // 3. Add the edges for the PHI nodes.
   // 4. Emit GEPs to get the original pointers.
   // 5. Remove the original instructions.
-  Type *IndexType = IntegerType::get(
-      Base->getContext(), DL.getPointerTypeSizeInBits(Start->getType()));
+  Type *IndexType = DL.getIndexType(Start->getType());
 
   DenseMap<Value *, Value *> NewInsts;
   NewInsts[Base] = ConstantInt::getNullValue(IndexType);
@@ -789,8 +788,7 @@
 /// the GEPs Pointer and Index.
 static std::pair<Value *, Value *>
 getAsConstantIndexedAddress(Value *V, const DataLayout &DL) {
-  Type *IndexType = IntegerType::get(V->getContext(),
-                                     DL.getPointerTypeSizeInBits(V->getType()));
+  Type *IndexType = DL.getIndexType(V->getType());
 
   Constant *Index = ConstantInt::getNullValue(IndexType);
   while (true) {
Index: lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- lib/Transforms/InstCombine/InstructionCombining.cpp
+++ lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1107,7 +1107,7 @@
   // Start with the index over the outer type.  Note that the type size
   // might be zero (even if the offset isn't zero) if the indexed type
   // is something like [0 x {int, int}]
-  Type *IntPtrTy = DL.getIntPtrType(PtrTy);
+  Type *IndexTy = DL.getIndexType(PtrTy);
   int64_t FirstIdx = 0;
   if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
     FirstIdx = Offset/TySize;
@@ -1122,7 +1122,7 @@
     assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
   }
 
-  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+  NewIndices.push_back(ConstantInt::get(IndexTy, FirstIdx));
 
   // Index into the types.  If we fail, set OrigBase to null.
   while (Offset) {
@@ -1144,7 +1144,7 @@
     } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
       uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
       assert(EltSize && "Cannot index into a zero-sized array");
-      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+      NewIndices.push_back(ConstantInt::get(IndexTy,Offset/EltSize));
       Offset %= EltSize;
       Ty = AT->getElementType();
     } else {
@@ -1507,8 +1507,11 @@
   // Eliminate unneeded casts for indices, and replace indices which displace
   // by multiples of a zero size type with zero.
   bool MadeChange = false;
-  Type *IntPtrTy =
-    DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType());
+
+  // Index width shouldn't have the same width as pointer. Data layout chooses
+  // the right type based on supported integer types.
+  Type *NewScalarIndexTy =
+      DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
 
   gep_type_iterator GTI = gep_type_begin(GEP);
   for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
@@ -1517,10 +1520,11 @@
     if (GTI.isStruct())
       continue;
 
-    // Index type should have the same width as IntPtr
     Type *IndexTy = (*I)->getType();
-    Type *NewIndexType = IndexTy->isVectorTy() ?
-      VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) : IntPtrTy;
+    Type *NewIndexType =
+        IndexTy->isVectorTy()
+            ? VectorType::get(NewScalarIndexTy, IndexTy->getVectorNumElements())
+            : NewScalarIndexTy;
 
     // If the element type has zero size then any index over it is equivalent
     // to an index of zero, so replace it with zero if it is not zero already.
@@ -1849,7 +1853,7 @@
       if (SrcElTy->isArrayTy() &&
           DL.getTypeAllocSize(SrcElTy->getArrayElementType()) ==
               DL.getTypeAllocSize(ResElTy)) {
-        Type *IdxType = DL.getIntPtrType(GEP.getType());
+        Type *IdxType = DL.getIndexType(GEP.getType());
         Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
         Value *NewGEP =
             GEP.isInBounds()
@@ -1876,10 +1880,11 @@
           unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
           uint64_t Scale = SrcSize / ResSize;
 
-          // Earlier transforms ensure that the index has type IntPtrType, which
-          // considerably simplifies the logic by eliminating implicit casts.
-          assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
-                 "Index not cast to pointer width?");
+          // Earlier transforms ensure that the index has the right type
+          // according to Data Layout, which considerably simplifies the
+          // logic by eliminating implicit casts.
+          assert(Idx->getType() == DL.getIndexType(GEP.getType()) &&
+                 "Index type does not match the Data Layout preferences");
 
           bool NSW;
           if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
@@ -1915,19 +1920,19 @@
           unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
           uint64_t Scale = ArrayEltSize / ResSize;
 
-          // Earlier transforms ensure that the index has type IntPtrType, which
-          // considerably simplifies the logic by eliminating implicit casts.
-          assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
-                 "Index not cast to pointer width?");
+          // Earlier transforms ensure that the index has the right type
+          // according to the Data Layout, which considerably simplifies
+          // the logic by eliminating implicit casts.
+          assert(Idx->getType() == DL.getIndexType(GEP.getType()) &&
+                 "Index type does not match the Data Layout preferences");
 
           bool NSW;
           if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
             // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
             // If the multiplication NewIdx * Scale may overflow then the new
             // GEP may not be "inbounds".
-            Value *Off[2] = {
-                Constant::getNullValue(DL.getIntPtrType(GEP.getType())),
-                NewIdx};
+            Type *IndTy = DL.getIndexType(GEP.getType());
+            Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
 
             Value *NewGEP = GEP.isInBounds() && NSW
                                 ? Builder.CreateInBoundsGEP(
Index: test/Transforms/InstCombine/gep-custom-dl.ll
===================================================================
--- test/Transforms/InstCombine/gep-custom-dl.ll
+++ test/Transforms/InstCombine/gep-custom-dl.ll
@@ -0,0 +1,155 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:m-p:40:64:64-i32:32-i16:16-i8:8-n32"
+
+%struct.B = type { double }
+%struct.A = type { %struct.B, i32, i32 }
+%struct.C = type { [7 x i8] }
+
+
+@Global = constant [10 x i8] c"helloworld"
+
+
+; Test that two array indexing geps fold
+define i32* @test1(i32* %I) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, i32* [[I:%.*]], i32 21
+; CHECK-NEXT:    ret i32* [[B]]
+;
+  %A = getelementptr i32, i32* %I, i8 17
+  %B = getelementptr i32, i32* %A, i16 4
+  ret i32* %B
+}
+
+; Test that two getelementptr insts fold
+define i32* @test2({ i32 }* %I) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[B:%.*]] = getelementptr { i32 }, { i32 }* [[I:%.*]], i32 1, i32 0
+; CHECK-NEXT:    ret i32* [[B]]
+;
+  %A = getelementptr { i32 }, { i32 }* %I, i32 1
+  %B = getelementptr { i32 }, { i32 }* %A, i32 0, i32 0
+  ret i32* %B
+}
+
+define void @test3(i8 %B) {
+; This should be turned into a constexpr instead of being an instruction
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    store i8 [[B:%.*]], i8* getelementptr inbounds ([10 x i8], [10 x i8]* @Global, i32 0, i32 4), align 1
+; CHECK-NEXT:    ret void
+;
+  %A = getelementptr [10 x i8], [10 x i8]* @Global, i32 0, i32 4
+  store i8 %B, i8* %A
+  ret void
+}
+
+%as1_ptr_struct = type { i32 addrspace(1)* }
+%as2_ptr_struct = type { i32 addrspace(2)* }
+
+@global_as2 = addrspace(2) global i32 zeroinitializer
+@global_as1_as2_ptr = addrspace(1) global %as2_ptr_struct { i32 addrspace(2)* @global_as2 }
+
+; This should be turned into a constexpr instead of being an instruction
+define void @test_evaluate_gep_nested_as_ptrs(i32 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_nested_as_ptrs(
+; CHECK-NEXT:    store i32 addrspace(2)* [[B:%.*]], i32 addrspace(2)* addrspace(1)* getelementptr inbounds (%as2_ptr_struct, [[AS2_PTR_STRUCT:%.*]] addrspace(1)* @global_as1_as2_ptr, i32 0, i32 0), align 8
+; CHECK-NEXT:    ret void
+;
+  %A = getelementptr %as2_ptr_struct, %as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i32 0, i32 0
+  store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* %A
+  ret void
+}
+
+@arst = addrspace(1) global [4 x i8 addrspace(2)*] zeroinitializer
+
+define void @test_evaluate_gep_as_ptrs_array(i8 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_as_ptrs_array(
+; CHECK-NEXT:    store i8 addrspace(2)* [[B:%.*]], i8 addrspace(2)* addrspace(1)* getelementptr inbounds ([4 x i8 addrspace(2)*], [4 x i8 addrspace(2)*] addrspace(1)* @arst, i32 0, i32 2), align 16
+; CHECK-NEXT:    ret void
+;
+
+  %A = getelementptr [4 x i8 addrspace(2)*], [4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2
+  store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* %A
+  ret void
+}
+
+define i32* @test4(i32* %I, i32 %C, i32 %D) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, i32* [[I:%.*]], i32 [[C:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, i32* [[A]], i32 [[D:%.*]]
+; CHECK-NEXT:    ret i32* [[B]]
+;
+  %A = getelementptr i32, i32* %I, i32 %C
+  %B = getelementptr i32, i32* %A, i32 %D
+  ret i32* %B
+}
+
+
+define i1 @test5({ i32, i32 }* %x, { i32, i32 }* %y) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP_4:%.*]] = icmp eq { i32, i32 }* [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP_4]]
+;
+  %tmp.1 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1
+  %tmp.3 = getelementptr { i32, i32 }, { i32, i32 }* %y, i32 0, i32 1
+  ;; seteq x, y
+  %tmp.4 = icmp eq i32* %tmp.1, %tmp.3
+  ret i1 %tmp.4
+}
+
+%S = type { i32, [ 100 x i32] }
+
+define <2 x i1> @test6(<2 x i32> %X, <2 x %S*> %P) nounwind {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %A = getelementptr inbounds %S, <2 x %S*> %P, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 1>, <2 x i32> %X
+  %B = getelementptr inbounds %S, <2 x %S*> %P, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32*> %A, %B
+  ret <2 x i1> %C
+}
+
+@G = external global [3 x i8]
+define i8* @test7(i16 %Idx) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[ZE_IDX:%.*]] = zext i16 [[IDX:%.*]] to i32
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [3 x i8], [3 x i8]* @G, i32 0, i32 [[ZE_IDX]]
+; CHECK-NEXT:    ret i8* [[TMP]]
+;
+  %ZE_Idx = zext i16 %Idx to i32
+  %tmp = getelementptr i8, i8* getelementptr ([3 x i8], [3 x i8]* @G, i32 0, i32 0), i32 %ZE_Idx
+  ret i8* %tmp
+}
+
+
+; Test folding of constantexpr geps into normal geps.
+@Array = external global [40 x i32]
+define i32 *@test8(i32 %X) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[A:%.*]] = getelementptr [40 x i32], [40 x i32]* @Array, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    ret i32* [[A]]
+;
+  %A = getelementptr i32, i32* getelementptr ([40 x i32], [40 x i32]* @Array, i32 0, i32 0), i32 %X
+  ret i32* %A
+}
+
+define i32 *@test9(i32 *%base, i8 %ind) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[IND:%.*]] to i32
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32* [[RES]]
+;
+  %res = getelementptr i32, i32 *%base, i8 %ind
+  ret i32* %res
+}
+
+define i32 @test10() {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    ret i32 8
+;
+  %A = getelementptr { i32, double }, { i32, double }* null, i32 0, i32 1
+  %B = ptrtoint double* %A to i32
+  ret i32 %B
+}
Index: test/Transforms/InstCombine/memcmp-constant-fold.ll
===================================================================
--- test/Transforms/InstCombine/memcmp-constant-fold.ll
+++ test/Transforms/InstCombine/memcmp-constant-fold.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S -data-layout=e-n32 | FileCheck %s --check-prefix=ALL --check-prefix=LE
 ; RUN: opt < %s -instcombine -S -data-layout=E-n32 | FileCheck %s --check-prefix=ALL --check-prefix=BE
 
@@ -54,7 +55,7 @@
 
 define i1 @memcmp_3bytes_aligned_constant_i32(i8* align 4 %x) {
 ; ALL-LABEL: @memcmp_3bytes_aligned_constant_i32(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* bitcast (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @intbuf, i64 0, i64 1) to i8*), i8* bitcast ([2 x i32]* @intbuf to i8*), i64 3)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* bitcast (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @intbuf, i32 0, i32 1) to i8*), i8* bitcast ([2 x i32]* @intbuf to i8*), i64 3)
 ; ALL-NEXT:    [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    ret i1 [[CMPEQ0]]
 ;