diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -571,6 +571,11 @@
   /// are set.
   unsigned getLargestLegalIntTypeSizeInBits() const;
 
+  /// Returns the type of a GEP index in AddressSpace.
+  /// If it was not specified explicitly, it will be the integer type of the
+  /// pointer width - IntPtrType.
+  IntegerType *getIndexType(LLVMContext &C, unsigned AddressSpace) const;
+
   /// Returns the type of a GEP index.
   /// If it was not specified explicitly, it will be the integer type of the
   /// pointer width - IntPtrType.
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -567,6 +567,12 @@
     return DL.getIntPtrType(Context, AddrSpace);
   }
 
+  /// Fetch the type of an integer that should be used to index GEP operations
+  /// within AddressSpace.
+  IntegerType *getIndexTy(const DataLayout &DL, unsigned AddrSpace) {
+    return DL.getIndexType(Context, AddrSpace);
+  }
+
   //===--------------------------------------------------------------------===//
   // Intrinsic creation methods
   //===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -1099,12 +1099,13 @@
   // must be a VLA
   assert(I.isArrayAllocation());
 
-  // If needed, adjust the alloca's operand size to match the pointer size.
-  // Subsequent math operations expect the types to match.
+  // If needed, adjust the alloca's operand size to match the pointer indexing
+  // size. Subsequent math operations expect the types to match.
   Value *ArraySize = Builder.CreateZExtOrTrunc(
-      I.getArraySize(), DL.getIntPtrType(I.getContext()));
+      I.getArraySize(),
+      DL.getIndexType(I.getContext(), DL.getAllocaAddrSpace()));
   assert(ArraySize->getType() == Zero->getType() &&
-         "Expected zero constant to have pointer type");
+         "Expected zero constant to have pointer index type");
 
   Value *Size = ConstantInt::get(ArraySize->getType(),
                                  DL.getTypeAllocSize(I.getAllocatedType()));
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -885,6 +885,11 @@
   return Max != LegalIntWidths.end() ? *Max : 0;
 }
 
+IntegerType *DataLayout::getIndexType(LLVMContext &C,
+                                      unsigned AddressSpace) const {
+  return IntegerType::get(C, getIndexSizeInBits(AddressSpace));
+}
+
 Type *DataLayout::getIndexType(Type *Ty) const {
   assert(Ty->isPtrOrPtrVectorTy() &&
          "Expected a pointer or pointer vector type.");
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -686,7 +686,7 @@
   }
 
   if (auto GEP = dyn_cast<GEPOperator>(V)) {
-    APInt APOffset(DL.getPointerSizeInBits(0), 0);
+    APInt APOffset(DL.getIndexSizeInBits(0), 0);
     bool Result = GEP->accumulateConstantOffset(DL, APOffset);
     if (!Result)
       return false;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -276,14 +276,14 @@
   // order the state machines in complexity of the generated code.
   Value *Idx = GEP->getOperand(2);
 
-  // If the index is larger than the pointer size of the target, truncate the
-  // index down like the GEP would do implicitly.  We don't have to do this for
-  // an inbounds GEP because the index can't be out of range.
+  // If the index is larger than the pointer offset size of the target, truncate
+  // the index down like the GEP would do implicitly.  We don't have to do this
+  // for an inbounds GEP because the index can't be out of range.
   if (!GEP->isInBounds()) {
-    Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
-    unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
-    if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > PtrSize)
-      Idx = Builder.CreateTrunc(Idx, IntPtrTy);
+    Type *PtrIdxTy = DL.getIndexType(GEP->getType());
+    unsigned OffsetSize = PtrIdxTy->getIntegerBitWidth();
+    if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > OffsetSize)
+      Idx = Builder.CreateTrunc(Idx, PtrIdxTy);
   }
 
   // If inbounds keyword is not present, Idx * ElementSize can overflow.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -219,7 +219,7 @@
       // Now that I is pointing to the first non-allocation-inst in the block,
       // insert our getelementptr instruction...
       //
-      Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
+      Type *IdxTy = IC.getDataLayout().getIndexType(AI.getType());
       Value *NullIdx = Constant::getNullValue(IdxTy);
       Value *Idx[2] = {NullIdx, NullIdx};
       Instruction *GEP = GetElementPtrInst::CreateInBounds(
@@ -235,11 +235,12 @@
   if (isa<UndefValue>(AI.getArraySize()))
     return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
 
-  // Ensure that the alloca array size argument has type intptr_t, so that
-  // any casting is exposed early.
-  Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
-  if (AI.getArraySize()->getType() != IntPtrTy) {
-    Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
+  // Ensure that the alloca array size argument has type equal to the offset
+  // size of the alloca() pointer, which, in the tyical case, is intptr_t,
+  // so that any casting is exposed early.
+  Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType());
+  if (AI.getArraySize()->getType() != PtrIdxTy) {
+    Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false);
     return IC.replaceOperand(AI, 0, V);
   }
 
diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
--- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -71,8 +71,8 @@
   Value *Offset = SizeOffset.second;
   ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
 
-  Type *IntTy = DL.getIntPtrType(Ptr->getType());
-  Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
+  Type *IndexTy = DL.getIndexType(Ptr->getType());
+  Value *NeededSizeVal = ConstantInt::get(IndexTy, NeededSize);
 
   auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size));
   auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset));
@@ -97,7 +97,7 @@
   Value *Or = IRB.CreateOr(Cmp2, Cmp3);
   if ((!SizeCI || SizeCI->getValue().slt(0)) &&
       !SizeRange.getSignedMin().isNonNegative()) {
-    Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0));
+    Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IndexTy, 0));
     Or = IRB.CreateOr(Cmp1, Or);
   }
 
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -410,8 +410,8 @@
 
   // Get offset from the base GV.
   PointerType *GVPtrTy = cast<PointerType>(BaseGV->getType());
-  IntegerType *PtrIntTy = DL->getIntPtrType(*Ctx, GVPtrTy->getAddressSpace());
-  APInt Offset(DL->getTypeSizeInBits(PtrIntTy), /*val*/0, /*isSigned*/true);
+  IntegerType *OffsetTy = DL->getIndexType(*Ctx, GVPtrTy->getAddressSpace());
+  APInt Offset(DL->getTypeSizeInBits(OffsetTy), /*val*/ 0, /*isSigned*/ true);
   auto *GEPO = cast<GEPOperator>(ConstExpr);
 
   // TODO: If we have a mix of inbounds and non-inbounds GEPs, then basing a
@@ -432,7 +432,7 @@
   // to be cheaper than compute it by <Base + Offset>, which can be lowered to
   // an ADD instruction or folded into Load/Store instruction.
   InstructionCost Cost =
-      TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy,
+      TTI->getIntImmCostInst(Instruction::Add, 1, Offset, OffsetTy,
                              TargetTransformInfo::TCK_SizeAndLatency, Inst);
   ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV];
   ConstCandMapType::iterator Itr;
diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -157,7 +157,7 @@
     return {};
   }
 
-  APInt Offset = APInt(DL.getPointerTypeSizeInBits(Addr->getType()), 0);
+  APInt Offset = APInt(DL.getIndexTypeSizeInBits(Addr->getType()), 0);
   Value *Base = Addr;
   auto *GEP = dyn_cast<GetElementPtrInst>(Addr);
   if (GEP) {
diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
--- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -351,9 +351,9 @@
 
 bool NaryReassociatePass::requiresSignExtension(Value *Index,
                                                 GetElementPtrInst *GEP) {
-  unsigned PointerSizeInBits =
-      DL->getPointerSizeInBits(GEP->getType()->getPointerAddressSpace());
-  return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
+  unsigned IndexSizeInBits =
+      DL->getIndexSizeInBits(GEP->getType()->getPointerAddressSpace());
+  return cast<IntegerType>(Index->getType())->getBitWidth() < IndexSizeInBits;
 }
 
 GetElementPtrInst *
@@ -449,12 +449,12 @@
     return nullptr;
 
   // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));
-  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
-  if (RHS->getType() != IntPtrTy)
-    RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy);
+  Type *PtrIdxTy = DL->getIndexType(GEP->getType());
+  if (RHS->getType() != PtrIdxTy)
+    RHS = Builder.CreateSExtOrTrunc(RHS, PtrIdxTy);
   if (IndexedSize != ElementSize) {
     RHS = Builder.CreateMul(
-        RHS, ConstantInt::get(IntPtrTy, IndexedSize / ElementSize));
+        RHS, ConstantInt::get(PtrIdxTy, IndexedSize / ElementSize));
   }
   GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(
       Builder.CreateGEP(GEP->getResultElementType(), Candidate, RHS));
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -428,7 +428,7 @@
   /// Returns true if the module changes.
   ///
   /// Verified in @i32_add in split-gep.ll
-  bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
+  bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);
 
   /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
   /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
@@ -791,17 +791,17 @@
       .getSExtValue();
 }
 
-bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
+bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(
     GetElementPtrInst *GEP) {
   bool Changed = false;
-  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+  Type *PtrIdxTy = DL->getIndexType(GEP->getType());
   gep_type_iterator GTI = gep_type_begin(*GEP);
   for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
        I != E; ++I, ++GTI) {
     // Skip struct member indices which must be i32.
     if (GTI.isSequential()) {
-      if ((*I)->getType() != IntPtrTy) {
-        *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
+      if ((*I)->getType() != PtrIdxTy) {
+        *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom", GEP);
         Changed = true;
       }
     }
@@ -849,7 +849,7 @@
 void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
     GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
   IRBuilder<> Builder(Variadic);
-  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
+  Type *PtrIndexTy = DL->getIndexType(Variadic->getType());
 
   Type *I8PtrTy =
       Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
@@ -875,15 +875,16 @@
         if (CI->isZero())
           continue;
 
-      APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
+      APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(),
                                 DL->getTypeAllocSize(GTI.getIndexedType()));
       // Scale the index by element size.
       if (ElementSize != 1) {
         if (ElementSize.isPowerOf2()) {
           Idx = Builder.CreateShl(
-              Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
+              Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2()));
         } else {
-          Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
+          Idx =
+              Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize));
         }
       }
       // Create an ugly GEP with a single index for each index.
@@ -896,7 +897,7 @@
 
   // Create a GEP with the constant offset index.
   if (AccumulativeByteOffset != 0) {
-    Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
+    Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset);
     ResultPtr =
         Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
   } else
@@ -922,6 +923,9 @@
                                                int64_t AccumulativeByteOffset) {
   IRBuilder<> Builder(Variadic);
   Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
+  assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&
+         "Pointer type must match index type for arithmetic-based lowering of "
+         "split GEPs");
 
   Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
   gep_type_iterator GTI = gep_type_begin(*Variadic);
@@ -973,7 +977,7 @@
   if (GEP->hasAllConstantIndices())
     return false;
 
-  bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
+  bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);
 
   bool NeedsExtraction;
   int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
@@ -1057,7 +1061,15 @@
   if (LowerGEP) {
     // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
     // arithmetic operations if the target uses alias analysis in codegen.
-    if (TTI.useAA())
+    // Additionally, pointers that aren't integral (and so can't be safely
+    // converted to integers) or those whose offset size is different from their
+    // pointer size (which means that doing integer arithmetic on them could
+    // affect that data) can't be lowered in this way.
+    unsigned AddrSpace = GEP->getPointerAddressSpace();
+    bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) !=
+                               DL->getIndexSizeInBits(AddrSpace);
+    if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) ||
+        PointerHasExtraData)
       lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
     else
       lowerToArithmetics(GEP, AccumulativeByteOffset);
@@ -1104,13 +1116,13 @@
   // used with unsigned integers later.
   int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
       DL->getTypeAllocSize(GEP->getResultElementType()));
-  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+  Type *PtrIdxTy = DL->getIndexType(GEP->getType());
   if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
     // Very likely. As long as %gep is naturally aligned, the byte offset we
     // extracted should be a multiple of sizeof(*%gep).
     int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
     NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
-                                       ConstantInt::get(IntPtrTy, Index, true),
+                                       ConstantInt::get(PtrIdxTy, Index, true),
                                        GEP->getName(), GEP);
     NewGEP->copyMetadata(*GEP);
     // Inherit the inbounds attribute of the original GEP.
@@ -1136,7 +1148,7 @@
 
     NewGEP = cast<Instruction>(Builder.CreateGEP(
         Builder.getInt8Ty(), Builder.CreateBitCast(NewGEP, I8PtrTy),
-        {ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true)}, "uglygep",
+        {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, "uglygep",
         GEPWasInBounds));
 
     NewGEP->copyMetadata(*GEP);
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -484,9 +484,9 @@
   //   = B + (sext(Idx) * sext(S)) * ElementSize
   //   = B + (sext(Idx) * ElementSize) * sext(S)
   // Casting to IntegerType is safe because we skipped vector GEPs.
-  IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));
+  IntegerType *PtrIdxTy = cast<IntegerType>(DL->getIndexType(I->getType()));
   ConstantInt *ScaledIdx = ConstantInt::get(
-      IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
+      PtrIdxTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
   allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
 }
 
@@ -549,18 +549,18 @@
     Value *ArrayIdx = GEP->getOperand(I);
     uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
     if (ArrayIdx->getType()->getIntegerBitWidth() <=
-        DL->getPointerSizeInBits(GEP->getAddressSpace())) {
-      // Skip factoring if ArrayIdx is wider than the pointer size, because
-      // ArrayIdx is implicitly truncated to the pointer size.
+        DL->getIndexSizeInBits(GEP->getAddressSpace())) {
+      // Skip factoring if ArrayIdx is wider than the index size, because
+      // ArrayIdx is implicitly truncated to the index size.
       factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP);
     }
     // When ArrayIdx is the sext of a value, we try to factor that value as
     // well.  Handling this case is important because array indices are
-    // typically sign-extended to the pointer size.
+    // typically sign-extended to the pointer index size.
     Value *TruncatedArrayIdx = nullptr;
     if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))) &&
         TruncatedArrayIdx->getType()->getIntegerBitWidth() <=
-            DL->getPointerSizeInBits(GEP->getAddressSpace())) {
+            DL->getIndexSizeInBits(GEP->getAddressSpace())) {
       // Skip factoring if TruncatedArrayIdx is wider than the pointer size,
       // because TruncatedArrayIdx is implicitly truncated to the pointer size.
       factorArrayIndex(TruncatedArrayIdx, BaseExpr, ElementSize, GEP);
@@ -675,24 +675,24 @@
   }
   case Candidate::GEP:
     {
-      Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
-      bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
-      if (BumpWithUglyGEP) {
-        // C = (char *)Basis + Bump
-        unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
-        Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
-        Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
-        Reduced =
-            Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds);
-        Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
-      } else {
-        // C = gep Basis, Bump
-        // Canonicalize bump to pointer size.
-        Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
-        Reduced = Builder.CreateGEP(
-            cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
-            Basis.Ins, Bump, "", InBounds);
-      }
+    Type *OffsetTy = DL->getIndexType(C.Ins->getType());
+    bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
+    if (BumpWithUglyGEP) {
+      // C = (char *)Basis + Bump
+      unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
+      Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
+      Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
+      Reduced =
+          Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds);
+      Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
+    } else {
+      // C = gep Basis, Bump
+      // Canonicalize bump to pointer size.
+      Bump = Builder.CreateSExtOrTrunc(Bump, OffsetTy);
+      Reduced = Builder.CreateGEP(
+          cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(), Basis.Ins,
+          Bump, "", InBounds);
+    }
       break;
     }
   default:
diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -715,8 +715,8 @@
   // When we have target data, we can reduce the GEP down to the value in bytes
   // added to the address.
   const DataLayout &DL = FnL->getParent()->getDataLayout();
-  unsigned BitWidth = DL.getPointerSizeInBits(ASL);
-  APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+  unsigned OffsetBitWidth = DL.getIndexSizeInBits(ASL);
+  APInt OffsetL(OffsetBitWidth, 0), OffsetR(OffsetBitWidth, 0);
   if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
       GEPR->accumulateConstantOffset(DL, OffsetR))
     return cmpAPInts(OffsetL, OffsetR);
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -323,17 +323,17 @@
           DL.getTypeStoreSize(PtrBTy->getScalarType()))
     return false;
 
-  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
-  APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy));
+  unsigned PtrOffsetWidth = DL.getIndexSizeInBits(ASA);
+  APInt Size(PtrOffsetWidth, DL.getTypeStoreSize(PtrATy));
 
   return areConsecutivePointers(PtrA, PtrB, Size);
 }
 
 bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB,
                                         APInt PtrDelta, unsigned Depth) const {
-  unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType());
-  APInt OffsetA(PtrBitWidth, 0);
-  APInt OffsetB(PtrBitWidth, 0);
+  unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType());
+  APInt OffsetA(OffsetBitWidth, 0);
+  APInt OffsetB(OffsetBitWidth, 0);
   PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
   PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
 
diff --git a/llvm/test/Instrumentation/BoundsChecking/simple.ll b/llvm/test/Instrumentation/BoundsChecking/simple.ll
--- a/llvm/test/Instrumentation/BoundsChecking/simple.ll
+++ b/llvm/test/Instrumentation/BoundsChecking/simple.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=bounds-checking -S | FileCheck %s
-target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:64:64:64:48-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 @.str = private constant [8 x i8] c"abcdefg\00"
 
 @.str_as1 = private addrspace(1) constant [8 x i8] c"abcdefg\00"
 
+@.str_as2 = private addrspace(2) constant [8 x i8] c"abcdefg\00"
+
 
 declare noalias ptr @malloc(i64) nounwind allocsize(0)
 declare noalias ptr @calloc(i64, i64) nounwind allocsize(0,1)
@@ -145,6 +147,28 @@
   ret void
 }
 
+define void @f5_as2(i32 %x) nounwind {;
+; CHECK-LABEL: @f5_as2(
+; CHECK-NEXT:    [[X_C:%.*]] = sext i32 [[X:%.*]] to i48
+; CHECK-NEXT:    [[TMP1:%.*]] = add i48 0, [[X_C]]
+; CHECK-NEXT:    [[IDX:%.*]] = getelementptr inbounds [8 x i8], ptr addrspace(2) @.str_as2, i32 0, i32 [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i48 8, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i48 8, [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i48 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TRAP:%.*]], label [[TMP6:%.*]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr addrspace(2) [[IDX]], align 4
+; CHECK-NEXT:    ret void
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap() #[[ATTR5]]
+; CHECK-NEXT:    unreachable
+;
+  %idx = getelementptr inbounds [8 x i8], ptr addrspace(2) @.str_as2, i32 0, i32 %x
+  %1 = load i8, ptr addrspace(2) %idx, align 4
+  ret void
+}
+
 define void @f6(i64 %x) nounwind {
 ; CHECK-LABEL: @f6(
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca i128, align 8
diff --git a/llvm/test/Transforms/InstCombine/alloca-intptr-not-sizet.ll b/llvm/test/Transforms/InstCombine/alloca-intptr-not-sizet.ll
--- a/llvm/test/Transforms/InstCombine/alloca-intptr-not-sizet.ll
+++ b/llvm/test/Transforms/InstCombine/alloca-intptr-not-sizet.ll
@@ -4,8 +4,8 @@
 
 define void @test_array_alloca_intptr_not_sizet(i64 %size, ptr %dest) {
 ; CHECK-LABEL: @test_array_alloca_intptr_not_sizet(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i64 [[SIZE:%.*]] to i128
-; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca i8, i128 [[TMP1]], align 1, addrspace(7)
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[SIZE:%.*]] to i32
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca i8, i32 [[TMP1]], align 1, addrspace(7)
 ; CHECK-NEXT:    store ptr addrspace(7) [[ALLOCA]], ptr [[DEST:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=instcombine -S -data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck %s
+; RUN: opt -passes=instcombine -S -data-layout="p:32:32:32-p1:16:16:16-p2:128:128:128:32-n8:16:32:64" < %s | FileCheck %s
 
 @G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
   i16 73, i16 82, i16 69, i16 68, i16 0]
@@ -7,6 +7,9 @@
 @G16_as1 = internal addrspace(1) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
   i16 73, i16 82, i16 69, i16 68, i16 0]
 
+@G16_as2 = internal addrspace(2) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
+  i16 73, i16 82, i16 69, i16 68, i16 0]
+
 @GD = internal constant [6 x double]
   [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
 
@@ -68,6 +71,19 @@
 
 }
 
+define i1 @test1_noinbounds_as2(i64 %x) {
+; CHECK-LABEL: @test1_noinbounds_as2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i64 [[TMP1]], 9
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %p = getelementptr [10 x i16], ptr addrspace(2) @G16_as2, i16 0, i64 %x
+  %q = load i16, ptr addrspace(2) %p
+  %r = icmp eq i16 %q, 0
+  ret i1 %r
+
+}
+
 define i1 @test2(i32 %X) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[X:%.*]], 4
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s
 
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:128:128:128:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
 
 ; CHECK-LABEL: @merge_v2i32_v2i32(
 ; CHECK: load <4 x i32>
@@ -72,6 +72,23 @@
   ret void
 }
 
+; CHECK-LABEL: @merge_fat_ptrs(
+; CHECK: load <4 x i16>
+; CHECK: store <4 x i16> zeroinitializer
+define amdgpu_kernel void @merge_fat_ptrs(ptr addrspace(7) nocapture %a, ptr addrspace(7) nocapture readonly %b) #0 {
+entry:
+  %a.1 = getelementptr inbounds <2 x i16>, ptr addrspace(7) %a, i32 1
+  %b.1 = getelementptr inbounds <2 x i16>, ptr addrspace(7) %b, i32 1
+
+  %ld.c = load <2 x i16>, ptr addrspace(7) %b, align 4
+  %ld.c.idx.1 = load <2 x i16>, ptr addrspace(7) %b.1, align 4
+
+  store <2 x i16> zeroinitializer, ptr addrspace(7) %a, align 4
+  store <2 x i16> zeroinitializer, ptr addrspace(7) %a.1, align 4
+
+  ret void
+}
+
 ; Ideally this would be merged
 ; CHECK-LABEL: @merge_load_i32_v2i16(
 ; CHECK: load i32,
diff --git a/llvm/test/Transforms/NaryReassociate/nary-gep.ll b/llvm/test/Transforms/NaryReassociate/nary-gep.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/NaryReassociate/nary-gep.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=nary-reassociate -S | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:32:32:32-p2:128:128:128:32-i64:64-v16:16-v32:32-n32:64"
+
+; Even though addrspace(2) has 128 bit pointers, no sign extension needed
+; becasue it has 32-bit offsets.
+define void @no_sext_fat_pointer(ptr addrspace(2) %a, i32 %i, i32 %j) {
+; CHECK-LABEL: @no_sext_fat_pointer(
+; CHECK-NEXT:    [[V2:%.*]] = getelementptr float, ptr addrspace(2) [[A:%.*]], i32 [[I:%.*]]
+; CHECK-NEXT:    call void @foo(ptr addrspace(2) [[V2]])
+; CHECK-NEXT:    [[V3:%.*]] = getelementptr float, ptr addrspace(2) [[V2]], i32 [[J:%.*]]
+; CHECK-NEXT:    call void @foo(ptr addrspace(2) [[V3]])
+; CHECK-NEXT:    ret void
+;
+  %v1 = add i32 %i, %j
+  %v2 = getelementptr float, ptr addrspace(2) %a, i32 %i
+  call void @foo(ptr addrspace(2) %v2)
+  %v3 = getelementptr float, ptr addrspace(2) %a, i32 %v1
+  call void @foo(ptr addrspace(2) %v3)
+  ret void
+}
+
+declare void @foo(ptr addrspace(2))
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/pointer-type-not-offset-type.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/pointer-type-not-offset-type.ll
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/pointer-type-not-offset-type.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/pointer-type-not-offset-type.ll
@@ -4,8 +4,8 @@
 
 define void @should_be_trunc(ptr addrspace(1) %ptr,  i64 %index, ptr %result) {
 ; CHECK-LABEL: @should_be_trunc(
-; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i64 [[INDEX:%.*]] to i128
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR:%.*]], i128 [[IDXPROM]]
+; CHECK-NEXT:    [[IDXPROM:%.*]] = trunc i64 [[INDEX:%.*]] to i32
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR:%.*]], i32 [[IDXPROM]]
 ; CHECK-NEXT:    store ptr addrspace(1) [[GEP]], ptr [[RESULT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll
--- a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll
+++ b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -passes=slsr,gvn -S | FileCheck %s
 ; RUN: opt < %s -passes='slsr,gvn' -S | FileCheck %s
 
-target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64-p:64:64:64-p1:32:32:32"
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64-p:64:64:64-p1:32:32:32-p2:128:128:128:32"
 
 ; foo(input[0]);
 ; foo(input[s]);
@@ -183,6 +183,23 @@
   ret void
 }
 
+define void @slsr_gep_fat_pointer(ptr addrspace(2) %input, i32 %s) {
+  ; p1 = &input[s]
+  %p1 = getelementptr inbounds i32, ptr addrspace(2) %input, i32 %s
+  call void @baz2(ptr addrspace(2) %p1)
+
+  ; p2 = &input[s * 2]
+  %s2 = mul nsw i32 %s, 2
+  %p2 = getelementptr inbounds i32, ptr addrspace(2) %input, i32 %s2
+; CHECK: %p2 = getelementptr inbounds i32, ptr addrspace(2) %p1, i32 %s
+  ; Use index bitwidth, not pointer size (i128)
+  call void @baz2(ptr addrspace(2) %p2)
+
+  ret void
+}
+
+
 declare void @foo(ptr)
 declare void @bar(ptr)
 declare void @baz(ptr addrspace(1))
+declare void @baz2(ptr addrspace(2))