Index: ../docs/LangRef.rst
===================================================================
--- ../docs/LangRef.rst
+++ ../docs/LangRef.rst
@@ -1065,7 +1065,7 @@
     for return values.
 
 .. _attr_align:
-    
+
 ``align <n>``
     This indicates that the pointer value may be assumed by the optimizer to
     have the specified alignment.
@@ -1908,10 +1908,12 @@
 ``A<address space>``
     Specifies the address space of  objects created by '``alloca``'.
     Defaults to the default address space of 0.
-``p[n]:<size>:<abi>:<pref>``
+``p[n]:<size>:<abi>:<pref>:<idx>``
     This specifies the *size* of a pointer and its ``<abi>`` and
-    ``<pref>``\erred alignments for address space ``n``. All sizes are in
-    bits. The address space, ``n``, is optional, and if not specified,
+    ``<pref>``\erred alignments for address space ``n``. The fourth parameter
+    ``<idx>`` is a size of index that used for address calculation. If not
+    specified, the default index size is equal to the pointer size. All sizes
+    are in bits. The address space, ``n``, is optional, and if not specified,
     denotes the default address space 0. The value of ``n`` must be
     in the range [1,2^23).
 ``i<size>:<abi>:<pref>``
@@ -2281,7 +2283,7 @@
 LLVM IR floating-point operations (:ref:`fadd <i_fadd>`,
 :ref:`fsub <i_fsub>`, :ref:`fmul <i_fmul>`, :ref:`fdiv <i_fdiv>`,
 :ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`) and :ref:`call <i_call>`
-may use the following flags to enable otherwise unsafe 
+may use the following flags to enable otherwise unsafe
 floating-point transformations.
 
 ``nnan``
@@ -2308,11 +2310,11 @@
 
 ``afn``
    Approximate functions - Allow substitution of approximate calculations for
-   functions (sin, log, sqrt, etc). See floating-point intrinsic definitions 
-   for places where this can apply to LLVM's intrinsic math functions. 
+   functions (sin, log, sqrt, etc). See floating-point intrinsic definitions
+   for places where this can apply to LLVM's intrinsic math functions.
 
 ``reassoc``
-   Allow reassociation transformations for floating-point instructions. 
+   Allow reassociation transformations for floating-point instructions.
    This may dramatically change results in floating point.
 
 ``fast``
@@ -6853,10 +6855,10 @@
 Semantics:
 """"""""""
 
-Return the same value as a libm '``fmod``' function but without trapping or 
+Return the same value as a libm '``fmod``' function but without trapping or
 setting ``errno``.
 
-The remainder has the same sign as the dividend. This instruction can also 
+The remainder has the same sign as the dividend. This instruction can also
 take any number of :ref:`fast-math flags <fastmath>`, which are optimization
 hints to enable otherwise unsafe floating-point optimizations:
 
@@ -10504,7 +10506,7 @@
 """"""""""
 
 The '``llvm.memset.*``' intrinsics fill "len" bytes of memory starting
-at the destination location. 
+at the destination location.
 
 '``llvm.sqrt.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -10538,10 +10540,10 @@
 """"""""""
 
 Return the same value as a corresponding libm '``sqrt``' function but without
-trapping or setting ``errno``. For types specified by IEEE-754, the result 
+trapping or setting ``errno``. For types specified by IEEE-754, the result
 matches a conforming libm implementation.
 
-When specified with the fast-math-flag 'afn', the result may be approximated 
+When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
 '``llvm.powi.*``' Intrinsic
@@ -10616,7 +10618,7 @@
 Return the same value as a corresponding libm '``sin``' function but without
 trapping or setting ``errno``.
 
-When specified with the fast-math-flag 'afn', the result may be approximated 
+When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
 '``llvm.cos.*``' Intrinsic
@@ -10653,7 +10655,7 @@
 Return the same value as a corresponding libm '``cos``' function but without
 trapping or setting ``errno``.
 
-When specified with the fast-math-flag 'afn', the result may be approximated 
+When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
 '``llvm.pow.*``' Intrinsic
@@ -10691,7 +10693,7 @@
 Return the same value as a corresponding libm '``pow``' function but without
 trapping or setting ``errno``.
 
-When specified with the fast-math-flag 'afn', the result may be approximated 
+When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
 '``llvm.exp.*``' Intrinsic
@@ -10729,7 +10731,7 @@
 Return the same value as a corresponding libm '``exp``' function but without
 trapping or setting ``errno``.
 
-When specified with the fast-math-flag 'afn', the result may be approximated 
+When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
 '``llvm.exp2.*``' Intrinsic
@@ -10767,7 +10769,7 @@
 Return the same value as a corresponding libm '``exp2``' function but without
 trapping or setting ``errno``.
 
-When specified with the fast-math-flag 'afn', the result may be approximated 
+When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
 '``llvm.log.*``' Intrinsic
@@ -10805,7 +10807,7 @@
 Return the same value as a corresponding libm '``log``' function but without
 trapping or setting ``errno``.
 
-When specified with the fast-math-flag 'afn', the result may be approximated 
+When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
 '``llvm.log10.*``' Intrinsic
@@ -10843,7 +10845,7 @@
 Return the same value as a corresponding libm '``log10``' function but without
 trapping or setting ``errno``.
 
-When specified with the fast-math-flag 'afn', the result may be approximated 
+When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
 '``llvm.log2.*``' Intrinsic
@@ -10881,7 +10883,7 @@
 Return the same value as a corresponding libm '``log2``' function but without
 trapping or setting ``errno``.
 
-When specified with the fast-math-flag 'afn', the result may be approximated 
+When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
 '``llvm.fma.*``' Intrinsic
@@ -10918,7 +10920,7 @@
 Return the same value as a corresponding libm '``fma``' function but without
 trapping or setting ``errno``.
 
-When specified with the fast-math-flag 'afn', the result may be approximated 
+When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
 '``llvm.fabs.*``' Intrinsic
@@ -14558,4 +14560,3 @@
 is replaced with an actual element size.
 
 The optimizer is allowed to inline the memory assignment when it's profitable to do so.
-
Index: ../include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- ../include/llvm/CodeGen/BasicTTIImpl.h
+++ ../include/llvm/CodeGen/BasicTTIImpl.h
@@ -240,7 +240,7 @@
     bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
 
     // Early exit if both a jump table and bit test are not allowed.
-    if (N < 1 || (!IsJTAllowed && DL.getPointerSizeInBits() < N))
+    if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
       return N;
 
     APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
@@ -254,7 +254,7 @@
     }
 
     // Check if suitable for a bit test
-    if (N <= DL.getPointerSizeInBits()) {
+    if (N <= DL.getIndexSizeInBits(0u)) {
       SmallPtrSet<const BasicBlock *, 4> Dests;
       for (auto I : SI.cases())
         Dests.insert(I.getCaseSuccessor());
Index: ../include/llvm/CodeGen/TargetLowering.h
===================================================================
--- ../include/llvm/CodeGen/TargetLowering.h
+++ ../include/llvm/CodeGen/TargetLowering.h
@@ -812,7 +812,7 @@
   bool rangeFitsInWord(const APInt &Low, const APInt &High,
                        const DataLayout &DL) const {
     // FIXME: Using the pointer type doesn't seem ideal.
-    uint64_t BW = DL.getPointerSizeInBits();
+    uint64_t BW = DL.getIndexSizeInBits(0u);
     uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
     return Range <= BW;
   }
Index: ../include/llvm/IR/DataLayout.h
===================================================================
--- ../include/llvm/IR/DataLayout.h
+++ ../include/llvm/IR/DataLayout.h
@@ -92,10 +92,12 @@
   unsigned PrefAlign;
   uint32_t TypeByteWidth;
   uint32_t AddressSpace;
+  uint32_t IndexWidth;
 
   /// Initializer
   static PointerAlignElem get(uint32_t AddressSpace, unsigned ABIAlign,
-                              unsigned PrefAlign, uint32_t TypeByteWidth);
+                              unsigned PrefAlign, uint32_t TypeByteWidth,
+                              uint32_t IndexWidth);
 
   bool operator==(const PointerAlignElem &rhs) const;
 };
@@ -165,7 +167,8 @@
   unsigned getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width,
                             bool ABIAlign, Type *Ty) const;
   void setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign,
-                           unsigned PrefAlign, uint32_t TypeByteWidth);
+                           unsigned PrefAlign, uint32_t TypeByteWidth,
+                           uint32_t IndexWidth);
 
   /// Internal helper method that returns requested alignment for type.
   unsigned getAlignment(Type *Ty, bool abi_or_pref) const;
@@ -321,6 +324,9 @@
   /// the backends/clients are updated.
   unsigned getPointerSize(unsigned AS = 0) const;
 
+  // Index size used for address calculation.
+  unsigned getIndexSize(unsigned AS) const;
+
   /// Return the address spaces containing non-integral pointers.  Pointers in
   /// this address space don't have a well-defined bitwise representation.
   ArrayRef<unsigned> getNonIntegralAddressSpaces() const {
@@ -345,6 +351,11 @@
     return getPointerSize(AS) * 8;
   }
 
+  /// Size in bits of index used for address calculation in getelementptr.
+  unsigned getIndexSizeInBits(unsigned AS) const {
+    return getIndexSize(AS) * 8;
+  }
+
   /// Layout pointer size, in bits, based on the type.  If this function is
   /// called with a pointer type, then the type size of the pointer is returned.
   /// If this function is called with a vector of pointers, then the type size
@@ -352,6 +363,10 @@
   /// vector of pointers.
   unsigned getPointerTypeSizeInBits(Type *) const;
 
+  /// Layout size of the index used in GEP calculation.
+  /// The function should be called with pointer or vector of pointers type.
+  unsigned getIndexTypeSizeInBits(Type *Ty) const;
+
   unsigned getPointerTypeSize(Type *Ty) const {
     return getPointerTypeSizeInBits(Ty) / 8;
   }
@@ -453,6 +468,11 @@
   /// are set.
   unsigned getLargestLegalIntTypeSizeInBits() const;
 
+  /// \brief Returns the type of a GEP index.
+  /// If it was not specified explicitly, it will be the integer type of the
+  /// pointer width - IntPtrType.
+  Type *getIndexType(Type *PtrTy) const;
+
   /// \brief Returns the offset from the beginning of the type for the specified
   /// indices.
   ///
Index: ../lib/Analysis/ConstantFolding.cpp
===================================================================
--- ../lib/Analysis/ConstantFolding.cpp
+++ ../lib/Analysis/ConstantFolding.cpp
@@ -286,7 +286,7 @@
                                       APInt &Offset, const DataLayout &DL) {
   // Trivial case, constant is the global.
   if ((GV = dyn_cast<GlobalValue>(C))) {
-    unsigned BitWidth = DL.getPointerTypeSizeInBits(GV->getType());
+    unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
     Offset = APInt(BitWidth, 0);
     return true;
   }
@@ -305,7 +305,7 @@
   if (!GEP)
     return false;
 
-  unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
+  unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
   APInt TmpOffset(BitWidth, 0);
 
   // If the base isn't a global+constant, we aren't either.
@@ -808,26 +808,26 @@
   // If this is a constant expr gep that is effectively computing an
   // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
-    if (!isa<ConstantInt>(Ops[i])) {
+      if (!isa<ConstantInt>(Ops[i])) {
 
-      // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
-      // "inttoptr (sub (ptrtoint Ptr), V)"
-      if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
-        auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
-        assert((!CE || CE->getType() == IntPtrTy) &&
-               "CastGEPIndices didn't canonicalize index types!");
-        if (CE && CE->getOpcode() == Instruction::Sub &&
-            CE->getOperand(0)->isNullValue()) {
-          Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
-          Res = ConstantExpr::getSub(Res, CE->getOperand(1));
-          Res = ConstantExpr::getIntToPtr(Res, ResTy);
-          if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI))
-            Res = FoldedRes;
-          return Res;
+        // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
+        // "inttoptr (sub (ptrtoint Ptr), V)"
+        if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
+          auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
+          assert((!CE || CE->getType() == IntPtrTy) &&
+                 "CastGEPIndices didn't canonicalize index types!");
+          if (CE && CE->getOpcode() == Instruction::Sub &&
+              CE->getOperand(0)->isNullValue()) {
+            Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
+            Res = ConstantExpr::getSub(Res, CE->getOperand(1));
+            Res = ConstantExpr::getIntToPtr(Res, ResTy);
+            if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI))
+              Res = FoldedRes;
+            return Res;
+          }
         }
+        return nullptr;
       }
-      return nullptr;
-    }
 
   unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy);
   APInt Offset =
Index: ../lib/Analysis/InlineCost.cpp
===================================================================
--- ../lib/Analysis/InlineCost.cpp
+++ ../lib/Analysis/InlineCost.cpp
@@ -372,7 +372,7 @@
 /// Returns false if unable to compute the offset for any reason. Respects any
 /// simplified values known during the analysis of this callsite.
 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
-  unsigned IntPtrWidth = DL.getPointerTypeSizeInBits(GEP.getType());
+  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
   assert(IntPtrWidth == Offset.getBitWidth());
 
   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
@@ -1619,7 +1619,7 @@
     return nullptr;
 
   unsigned AS = V->getType()->getPointerAddressSpace();
-  unsigned IntPtrWidth = DL.getPointerSizeInBits(AS);
+  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
   APInt Offset = APInt::getNullValue(IntPtrWidth);
 
   // Even though we don't look through PHI nodes, we could be called on an
Index: ../lib/Analysis/InstructionSimplify.cpp
===================================================================
--- ../lib/Analysis/InstructionSimplify.cpp
+++ ../lib/Analysis/InstructionSimplify.cpp
@@ -3762,7 +3762,7 @@
       // The following transforms are only safe if the ptrtoint cast
       // doesn't truncate the pointers.
       if (Ops[1]->getType()->getScalarSizeInBits() ==
-          Q.DL.getPointerSizeInBits(AS)) {
+          Q.DL.getIndexSizeInBits(AS)) {
         auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * {
           if (match(P, m_Zero()))
             return Constant::getNullValue(GEPTy);
@@ -3802,10 +3802,10 @@
   if (Q.DL.getTypeAllocSize(LastType) == 1 &&
       all_of(Ops.slice(1).drop_back(1),
              [](Value *Idx) { return match(Idx, m_Zero()); })) {
-    unsigned PtrWidth =
-        Q.DL.getPointerSizeInBits(Ops[0]->getType()->getPointerAddressSpace());
-    if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == PtrWidth) {
-      APInt BasePtrOffset(PtrWidth, 0);
+    unsigned IdxWidth =
+        Q.DL.getIndexSizeInBits(Ops[0]->getType()->getPointerAddressSpace());
+    if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == IdxWidth) {
+      APInt BasePtrOffset(IdxWidth, 0);
       Value *StrippedBasePtr =
           Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL,
                                                             BasePtrOffset);
Index: ../lib/Analysis/Loads.cpp
===================================================================
--- ../lib/Analysis/Loads.cpp
+++ ../lib/Analysis/Loads.cpp
@@ -80,7 +80,7 @@
   if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
     const Value *Base = GEP->getPointerOperand();
 
-    APInt Offset(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
+    APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
     if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() ||
         !Offset.urem(APInt(Offset.getBitWidth(), Align)).isMinValue())
       return false;
@@ -146,7 +146,7 @@
 
   SmallPtrSet<const Value *, 32> Visited;
   return ::isDereferenceableAndAlignedPointer(
-      V, Align, APInt(DL.getTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL,
+      V, Align, APInt(DL.getIndexTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL,
       CtxI, DT, Visited);
 }
 
Index: ../lib/Analysis/LoopAccessAnalysis.cpp
===================================================================
--- ../lib/Analysis/LoopAccessAnalysis.cpp
+++ ../lib/Analysis/LoopAccessAnalysis.cpp
@@ -1127,11 +1127,11 @@
   if (CheckType && PtrA->getType() != PtrB->getType())
     return false;
 
-  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
+  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
   Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
-  APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
+  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
 
-  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
+  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
   PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
   PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
 
Index: ../lib/Analysis/ScalarEvolution.cpp
===================================================================
--- ../lib/Analysis/ScalarEvolution.cpp
+++ ../lib/Analysis/ScalarEvolution.cpp
@@ -3672,6 +3672,8 @@
 /// return true.
 uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
   assert(isSCEVable(Ty) && "Type is not SCEVable!");
+  if (Ty->isPointerTy())
+    return getDataLayout().getIndexTypeSizeInBits(Ty);
   return getDataLayout().getTypeSizeInBits(Ty);
 }
 
Index: ../lib/Analysis/ValueTracking.cpp
===================================================================
--- ../lib/Analysis/ValueTracking.cpp
+++ ../lib/Analysis/ValueTracking.cpp
@@ -89,7 +89,7 @@
   if (unsigned BitWidth = Ty->getScalarSizeInBits())
     return BitWidth;
 
-  return DL.getPointerTypeSizeInBits(Ty);
+  return DL.getIndexTypeSizeInBits(Ty);
 }
 
 namespace {
@@ -1101,7 +1101,10 @@
     unsigned SrcBitWidth;
     // Note that we handle pointer operands here because of inttoptr/ptrtoint
     // which fall through here.
-    SrcBitWidth = Q.DL.getTypeSizeInBits(SrcTy->getScalarType());
+    Type *ScalarTy = SrcTy->getScalarType();
+    SrcBitWidth = ScalarTy->isPointerTy() ?
+      Q.DL.getIndexTypeSizeInBits(ScalarTy) :
+      Q.DL.getTypeSizeInBits(ScalarTy);
 
     assert(SrcBitWidth && "SrcBitWidth can't be zero");
     Known = Known.zextOrTrunc(SrcBitWidth);
@@ -1555,9 +1558,13 @@
   assert((V->getType()->isIntOrIntVectorTy(BitWidth) ||
           V->getType()->isPtrOrPtrVectorTy()) &&
          "Not integer or pointer type!");
-  assert(Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth &&
-         "V and Known should have same BitWidth");
+
+  Type *ScalarTy = V->getType()->getScalarType();
+  unsigned ExpectedWidth = ScalarTy->isPointerTy() ?
+    Q.DL.getIndexTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy);
+  assert(ExpectedWidth == BitWidth && "V and Known should have same BitWidth");
   (void)BitWidth;
+  (void)ExpectedWidth;
 
   const APInt *C;
   if (match(V, m_APInt(C))) {
@@ -2194,7 +2201,11 @@
   // in V, so for undef we have to conservatively return 1.  We don't have the
   // same behavior for poison though -- that's a FIXME today.
 
-  unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType());
+  Type *ScalarTy = V->getType()->getScalarType();
+  unsigned TyBits = ScalarTy->isPointerTy() ?
+    Q.DL.getIndexTypeSizeInBits(ScalarTy) :
+    Q.DL.getTypeSizeInBits(ScalarTy);
+
   unsigned Tmp, Tmp2;
   unsigned FirstAnswer = 1;
 
@@ -3091,7 +3102,7 @@
 /// pointer plus a constant offset. Return the base and offset to the caller.
 Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
                                               const DataLayout &DL) {
-  unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType());
+  unsigned BitWidth = DL.getIndexTypeSizeInBits(Ptr->getType());
   APInt ByteOffset(BitWidth, 0);
 
   // We walk up the defs but use a visited set to handle unreachable code. In
@@ -3109,7 +3120,7 @@
       // means when we construct GEPOffset, we need to use the size
       // of GEP's pointer type rather than the size of the original
       // pointer type.
-      APInt GEPOffset(DL.getPointerTypeSizeInBits(Ptr->getType()), 0);
+      APInt GEPOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
       if (!GEP->accumulateConstantOffset(DL, GEPOffset))
         break;
 
Index: ../lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- ../lib/CodeGen/CodeGenPrepare.cpp
+++ ../lib/CodeGen/CodeGenPrepare.cpp
@@ -1581,7 +1581,7 @@
       // if size - offset meets the size threshold.
       if (!Arg->getType()->isPointerTy())
         continue;
-      APInt Offset(DL->getPointerSizeInBits(
+      APInt Offset(DL->getIndexSizeInBits(
                        cast<PointerType>(Arg->getType())->getAddressSpace()),
                    0);
       Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
Index: ../lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- ../lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ ../lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7988,8 +7988,8 @@
   const GlobalValue *GV;
   int64_t GVOffset = 0;
   if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
-    unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
-    KnownBits Known(PtrWidth);
+    unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType());
+    KnownBits Known(IdxWidth);
     llvm::computeKnownBits(GV, Known, getDataLayout());
     unsigned AlignBits = Known.countMinTrailingZeros();
     unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
Index: ../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- ../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ ../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3424,10 +3424,9 @@
                         DAG.getConstant(Offset, dl, N.getValueType()), Flags);
       }
     } else {
-      MVT PtrTy =
-          DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS);
-      unsigned PtrSize = PtrTy.getSizeInBits();
-      APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType()));
+      unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
+      MVT IdxTy = MVT::getIntegerVT(IdxSize);
+      APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
 
       // If this is a scalar constant or a splat vector of constants,
       // handle it quickly.
@@ -3439,11 +3438,11 @@
       if (CI) {
         if (CI->isZero())
           continue;
-        APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
+        APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
         LLVMContext &Context = *DAG.getContext();
         SDValue OffsVal = VectorWidth ?
-          DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) :
-          DAG.getConstant(Offs, dl, PtrTy);
+          DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
+          DAG.getConstant(Offs, dl, IdxTy);
 
         // In an inbouds GEP with an offset that is nonnegative even when
         // interpreted as signed, assume there is no unsigned overflow.
Index: ../lib/IR/DataLayout.cpp
===================================================================
--- ../lib/IR/DataLayout.cpp
+++ ../lib/IR/DataLayout.cpp
@@ -129,13 +129,15 @@
 
 PointerAlignElem
 PointerAlignElem::get(uint32_t AddressSpace, unsigned ABIAlign,
-                      unsigned PrefAlign, uint32_t TypeByteWidth) {
+                      unsigned PrefAlign, uint32_t TypeByteWidth,
+                      uint32_t IndexWidth) {
   assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!");
   PointerAlignElem retval;
   retval.AddressSpace = AddressSpace;
   retval.ABIAlign = ABIAlign;
   retval.PrefAlign = PrefAlign;
   retval.TypeByteWidth = TypeByteWidth;
+  retval.IndexWidth = IndexWidth;
   return retval;
 }
 
@@ -144,7 +146,8 @@
   return (ABIAlign == rhs.ABIAlign
           && AddressSpace == rhs.AddressSpace
           && PrefAlign == rhs.PrefAlign
-          && TypeByteWidth == rhs.TypeByteWidth);
+          && TypeByteWidth == rhs.TypeByteWidth
+          && IndexWidth == rhs.IndexWidth);
 }
 
 //===----------------------------------------------------------------------===//
@@ -189,7 +192,7 @@
     setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign, E.PrefAlign,
                  E.TypeBitWidth);
   }
-  setPointerAlignment(0, 8, 8, 8);
+  setPointerAlignment(0, 8, 8, 8, 8);
 
   parseSpecifier(Desc);
 }
@@ -287,6 +290,10 @@
         report_fatal_error(
             "Pointer ABI alignment must be a power of 2");
 
+      // Size of index used in GEP for address calculation.
+      // The parameter is optional. By default it is equal to size of pointer.
+      unsigned IndexSize = PointerMemSize;
+
       // Preferred alignment.
       unsigned PointerPrefAlign = PointerABIAlign;
       if (!Rest.empty()) {
@@ -295,10 +302,17 @@
         if (!isPowerOf2_64(PointerPrefAlign))
           report_fatal_error(
             "Pointer preferred alignment must be a power of 2");
-      }
 
+        // Now read the index. It is the second optional parameter here.
+        if (!Rest.empty()) {
+          Split = split(Rest, ':');
+          IndexSize = inBytes(getInt(Tok));
+          if (!IndexSize)
+            report_fatal_error("Invalid index size of 0 bytes");
+        }
+      }
       setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign,
-                          PointerMemSize);
+                          PointerMemSize, IndexSize);
       break;
     }
     case 'i':
@@ -467,8 +481,8 @@
 }
 
 void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign,
-                                     unsigned PrefAlign,
-                                     uint32_t TypeByteWidth) {
+                                     unsigned PrefAlign, uint32_t TypeByteWidth,
+                                     uint32_t IndexWidth) {
   if (PrefAlign < ABIAlign)
     report_fatal_error(
         "Preferred alignment cannot be less than the ABI alignment");
@@ -476,11 +490,12 @@
   PointersTy::iterator I = findPointerLowerBound(AddrSpace);
   if (I == Pointers.end() || I->AddressSpace != AddrSpace) {
     Pointers.insert(I, PointerAlignElem::get(AddrSpace, ABIAlign, PrefAlign,
-                                             TypeByteWidth));
+                                             TypeByteWidth, IndexWidth));
   } else {
     I->ABIAlign = ABIAlign;
     I->PrefAlign = PrefAlign;
     I->TypeByteWidth = TypeByteWidth;
+    I->IndexWidth = IndexWidth;
   }
 }
 
@@ -618,6 +633,22 @@
   return getPointerSizeInBits(cast<PointerType>(Ty)->getAddressSpace());
 }
 
+unsigned DataLayout::getIndexSize(unsigned AS) const {
+  PointersTy::const_iterator I = findPointerLowerBound(AS);
+  if (I == Pointers.end() || I->AddressSpace != AS) {
+    I = findPointerLowerBound(0);
+    assert(I->AddressSpace == 0);
+  }
+  return I->IndexWidth;
+}
+
+unsigned DataLayout::getIndexTypeSizeInBits(Type *Ty) const {
+  assert(Ty->isPtrOrPtrVectorTy() &&
+         "This should only be called with a pointer or pointer vector type");
+  Ty = Ty->getScalarType();
+  return getIndexSizeInBits(cast<PointerType>(Ty)->getAddressSpace());
+}
+
 /*!
   \param abi_or_pref Flag that determines which alignment is returned. true
   returns the ABI alignment, false returns the preferred alignment.
@@ -701,13 +732,13 @@
 
 IntegerType *DataLayout::getIntPtrType(LLVMContext &C,
                                        unsigned AddressSpace) const {
-  return IntegerType::get(C, getPointerSizeInBits(AddressSpace));
+  return IntegerType::get(C, getIndexSizeInBits(AddressSpace));
 }
 
 Type *DataLayout::getIntPtrType(Type *Ty) const {
   assert(Ty->isPtrOrPtrVectorTy() &&
          "Expected a pointer or pointer vector type.");
-  unsigned NumBits = getPointerTypeSizeInBits(Ty);
+  unsigned NumBits = getIndexTypeSizeInBits(Ty);
   IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits);
   if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
     return VectorType::get(IntTy, VecTy->getNumElements());
@@ -726,6 +757,16 @@
   return Max != LegalIntWidths.end() ? *Max : 0;
 }
 
+Type *DataLayout::getIndexType(Type *Ty) const {
+  assert(Ty->isPtrOrPtrVectorTy() &&
+         "Expected a pointer or pointer vector type.");
+  unsigned NumBits = getIndexTypeSizeInBits(Ty);
+  IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits);
+  if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
+    return VectorType::get(IntTy, VecTy->getNumElements());
+  return IntTy;
+}
+
 int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
                                            ArrayRef<Value *> Indices) const {
   int64_t Result = 0;
Index: ../lib/IR/Operator.cpp
===================================================================
--- ../lib/IR/Operator.cpp
+++ ../lib/IR/Operator.cpp
@@ -35,8 +35,8 @@
 bool GEPOperator::accumulateConstantOffset(const DataLayout &DL,
                                            APInt &Offset) const {
   assert(Offset.getBitWidth() ==
-             DL.getPointerSizeInBits(getPointerAddressSpace()) &&
-         "The offset must have exactly as many bits as our pointer.");
+             DL.getIndexSizeInBits(getPointerAddressSpace()) &&
+         "The offset bit width does not match DL specification.");
 
   for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
        GTI != GTE; ++GTI) {
Index: ../lib/IR/Value.cpp
===================================================================
--- ../lib/IR/Value.cpp
+++ ../lib/IR/Value.cpp
@@ -587,9 +587,9 @@
   if (!getType()->isPointerTy())
     return this;
 
-  assert(Offset.getBitWidth() == DL.getPointerSizeInBits(cast<PointerType>(
+  assert(Offset.getBitWidth() == DL.getIndexSizeInBits(cast<PointerType>(
                                      getType())->getAddressSpace()) &&
-         "The offset must have exactly as many bits as our pointer.");
+         "The offset bit width does not match the DL specification.");
 
   // Even though we don't look through PHI nodes, we could be called on an
   // instruction in an unreachable block, which may be on a cycle.
Index: ../lib/Transforms/InstCombine/InstCombineCasts.cpp
===================================================================
--- ../lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ ../lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1761,7 +1761,7 @@
   Type *Ty = CI.getType();
   unsigned AS = CI.getPointerAddressSpace();
 
-  if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS))
+  if (Ty->getScalarSizeInBits() == DL.getIndexSizeInBits(AS))
     return commonPointerCastTransforms(CI);
 
   Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS);
@@ -2014,13 +2014,13 @@
       !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
       !BO->isBitwiseLogicOp())
     return nullptr;
-  
+
   // FIXME: This transform is restricted to vector types to avoid backend
   // problems caused by creating potentially illegal operations. If a fix-up is
   // added to handle that situation, we can remove this check.
   if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
     return nullptr;
-  
+
   Value *X;
   if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
       X->getType() == DestTy && !isa<Constant>(X)) {
Index: ../lib/Transforms/InstCombine/InstCombineCompares.cpp
===================================================================
--- ../lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ ../lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -682,7 +682,7 @@
   // 4. Emit GEPs to get the original pointers.
   // 5. Remove the original instructions.
   Type *IndexType = IntegerType::get(
-      Base->getContext(), DL.getPointerTypeSizeInBits(Start->getType()));
+      Base->getContext(), DL.getIndexTypeSizeInBits(Start->getType()));
 
   DenseMap<Value *, Value *> NewInsts;
   NewInsts[Base] = ConstantInt::getNullValue(IndexType);
@@ -790,7 +790,7 @@
 static std::pair<Value *, Value *>
 getAsConstantIndexedAddress(Value *V, const DataLayout &DL) {
   Type *IndexType = IntegerType::get(V->getContext(),
-                                     DL.getPointerTypeSizeInBits(V->getType()));
+                                     DL.getIndexTypeSizeInBits(V->getType()));
 
   Constant *Index = ConstantInt::getNullValue(IndexType);
   while (true) {
@@ -4031,7 +4031,7 @@
   // Get scalar or pointer size.
   unsigned BitWidth = Ty->isIntOrIntVectorTy()
                           ? Ty->getScalarSizeInBits()
-                          : DL.getTypeSizeInBits(Ty->getScalarType());
+                          : DL.getIndexTypeSizeInBits(Ty->getScalarType());
 
   if (!BitWidth)
     return nullptr;
Index: ../lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- ../lib/Transforms/InstCombine/InstructionCombining.cpp
+++ ../lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1115,7 +1115,7 @@
   // Start with the index over the outer type.  Note that the type size
   // might be zero (even if the offset isn't zero) if the indexed type
   // is something like [0 x {int, int}]
-  Type *IntPtrTy = DL.getIntPtrType(PtrTy);
+  Type *IndexTy = DL.getIndexType(PtrTy);
   int64_t FirstIdx = 0;
   if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
     FirstIdx = Offset/TySize;
@@ -1130,7 +1130,7 @@
     assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
   }
 
-  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+  NewIndices.push_back(ConstantInt::get(IndexTy, FirstIdx));
 
   // Index into the types.  If we fail, set OrigBase to null.
   while (Offset) {
@@ -1152,7 +1152,7 @@
     } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
       uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
       assert(EltSize && "Cannot index into a zero-sized array");
-      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+      NewIndices.push_back(ConstantInt::get(IndexTy,Offset/EltSize));
       Offset %= EltSize;
       Ty = AT->getElementType();
     } else {
@@ -1515,8 +1515,11 @@
   // Eliminate unneeded casts for indices, and replace indices which displace
   // by multiples of a zero size type with zero.
   bool MadeChange = false;
-  Type *IntPtrTy =
-    DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType());
+
+  // Index width may not be the same width as pointer width.
+  // Data layout chooses the right type based on supported integer types.
+  Type *NewScalarIndexTy =
+      DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
 
   gep_type_iterator GTI = gep_type_begin(GEP);
   for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
@@ -1525,10 +1528,11 @@
     if (GTI.isStruct())
       continue;
 
-    // Index type should have the same width as IntPtr
     Type *IndexTy = (*I)->getType();
-    Type *NewIndexType = IndexTy->isVectorTy() ?
-      VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) : IntPtrTy;
+    Type *NewIndexType =
+        IndexTy->isVectorTy()
+            ? VectorType::get(NewScalarIndexTy, IndexTy->getVectorNumElements())
+            : NewScalarIndexTy;
 
     // If the element type has zero size then any index over it is equivalent
     // to an index of zero, so replace it with zero if it is not zero already.
@@ -1731,7 +1735,7 @@
   if (GEP.getNumIndices() == 1) {
     unsigned AS = GEP.getPointerAddressSpace();
     if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
-        DL.getPointerSizeInBits(AS)) {
+        DL.getIndexSizeInBits(AS)) {
       Type *Ty = GEP.getSourceElementType();
       uint64_t TyAllocSize = DL.getTypeAllocSize(Ty);
 
@@ -1857,7 +1861,7 @@
       if (SrcElTy->isArrayTy() &&
           DL.getTypeAllocSize(SrcElTy->getArrayElementType()) ==
               DL.getTypeAllocSize(ResElTy)) {
-        Type *IdxType = DL.getIntPtrType(GEP.getType());
+        Type *IdxType = DL.getIndexType(GEP.getType());
         Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
         Value *NewGEP =
             GEP.isInBounds()
@@ -1884,10 +1888,11 @@
           unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
           uint64_t Scale = SrcSize / ResSize;
 
-          // Earlier transforms ensure that the index has type IntPtrType, which
-          // considerably simplifies the logic by eliminating implicit casts.
-          assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
-                 "Index not cast to pointer width?");
+          // Earlier transforms ensure that the index has the right type
+          // according to Data Layout, which considerably simplifies the
+          // logic by eliminating implicit casts.
+          assert(Idx->getType() == DL.getIndexType(GEP.getType()) &&
+                 "Index type does not match the Data Layout preferences");
 
           bool NSW;
           if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
@@ -1923,19 +1928,19 @@
           unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
           uint64_t Scale = ArrayEltSize / ResSize;
 
-          // Earlier transforms ensure that the index has type IntPtrType, which
-          // considerably simplifies the logic by eliminating implicit casts.
-          assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
-                 "Index not cast to pointer width?");
+          // Earlier transforms ensure that the index has the right type
+          // according to the Data Layout, which considerably simplifies
+          // the logic by eliminating implicit casts.
+          assert(Idx->getType() == DL.getIndexType(GEP.getType()) &&
+                 "Index type does not match the Data Layout preferences");
 
           bool NSW;
           if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
             // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
             // If the multiplication NewIdx * Scale may overflow then the new
             // GEP may not be "inbounds".
-            Value *Off[2] = {
-                Constant::getNullValue(DL.getIntPtrType(GEP.getType())),
-                NewIdx};
+            Type *IndTy = DL.getIndexType(GEP.getType());
+            Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
 
             Value *NewGEP = GEP.isInBounds() && NSW
                                 ? Builder.CreateInBoundsGEP(
@@ -1971,7 +1976,7 @@
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
     Value *Operand = BCI->getOperand(0);
     PointerType *OpType = cast<PointerType>(Operand->getType());
-    unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType());
+    unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEP.getType());
     APInt Offset(OffsetBits, 0);
     if (!isa<BitCastInst>(Operand) &&
         GEP.accumulateConstantOffset(DL, Offset)) {
@@ -2020,16 +2025,16 @@
   }
 
   if (!GEP.isInBounds()) {
-    unsigned PtrWidth =
-        DL.getPointerSizeInBits(PtrOp->getType()->getPointerAddressSpace());
-    APInt BasePtrOffset(PtrWidth, 0);
+    unsigned IdxWidth =
+        DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
+    APInt BasePtrOffset(IdxWidth, 0);
     Value *UnderlyingPtrOp =
             PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL,
                                                              BasePtrOffset);
     if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
       if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
           BasePtrOffset.isNonNegative()) {
-        APInt AllocSize(PtrWidth, DL.getTypeAllocSize(AI->getAllocatedType()));
+        APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType()));
         if (BasePtrOffset.ule(AllocSize)) {
           return GetElementPtrInst::CreateInBounds(
               PtrOp, makeArrayRef(Ops).slice(1), GEP.getName());
Index: ../lib/Transforms/Scalar/SROA.cpp
===================================================================
--- ../lib/Transforms/Scalar/SROA.cpp
+++ ../lib/Transforms/Scalar/SROA.cpp
@@ -3648,7 +3648,7 @@
       auto *PartPtrTy = PartTy->getPointerTo(AS);
       LoadInst *PLoad = IRB.CreateAlignedLoad(
           getAdjustedPtr(IRB, DL, BasePtr,
-                         APInt(DL.getPointerSizeInBits(AS), PartOffset),
+                         APInt(DL.getIndexSizeInBits(AS), PartOffset),
                          PartPtrTy, BasePtr->getName() + "."),
           getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
           LI->getName());
@@ -3704,7 +3704,7 @@
         StoreInst *PStore = IRB.CreateAlignedStore(
             PLoad,
             getAdjustedPtr(IRB, DL, StoreBasePtr,
-                           APInt(DL.getPointerSizeInBits(AS), PartOffset),
+                           APInt(DL.getIndexSizeInBits(AS), PartOffset),
                            PartPtrTy, StoreBasePtr->getName() + "."),
             getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
         PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
@@ -3786,7 +3786,7 @@
         auto AS = LI->getPointerAddressSpace();
         PLoad = IRB.CreateAlignedLoad(
             getAdjustedPtr(IRB, DL, LoadBasePtr,
-                           APInt(DL.getPointerSizeInBits(AS), PartOffset),
+                           APInt(DL.getIndexSizeInBits(AS), PartOffset),
                            LoadPartPtrTy, LoadBasePtr->getName() + "."),
             getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
             LI->getName());
@@ -3798,7 +3798,7 @@
       StoreInst *PStore = IRB.CreateAlignedStore(
           PLoad,
           getAdjustedPtr(IRB, DL, StoreBasePtr,
-                         APInt(DL.getPointerSizeInBits(AS), PartOffset),
+                         APInt(DL.getIndexSizeInBits(AS), PartOffset),
                          StorePartPtrTy, StoreBasePtr->getName() + "."),
           getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
 
Index: ../lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
===================================================================
--- ../lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ ../lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1295,7 +1295,7 @@
 
   // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
   const DataLayout &DAL = First->getModule()->getDataLayout();
-  APInt Offset(DAL.getPointerSizeInBits(
+  APInt Offset(DAL.getIndexSizeInBits(
                    cast<PointerType>(First->getType())->getAddressSpace()),
                0);
   Value *NewBase =
Index: ../lib/Transforms/Utils/Local.cpp
===================================================================
--- ../lib/Transforms/Utils/Local.cpp
+++ ../lib/Transforms/Utils/Local.cpp
@@ -1527,7 +1527,7 @@
     }
   } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
     unsigned BitWidth =
-        M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace());
+        M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace());
     // Rewrite a constant GEP into a DIExpression.  Since we are performing
     // arithmetic to compute the variable's *value* in the DIExpression, we
     // need to mark the expression with a DW_OP_stack_value.
@@ -2123,7 +2123,7 @@
   if (!NewTy->isPointerTy())
     return;
 
-  unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
+  unsigned BitWidth = DL.getIndexTypeSizeInBits(NewTy);
   if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
     MDNode *NN = MDNode::get(OldLI.getContext(), None);
     NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
Index: ../lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
===================================================================
--- ../lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ ../lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -323,7 +323,8 @@
 
   APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy));
 
-  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
+  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
+  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
   PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
   PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
 
Index: ../test/Transforms/InstCombine/gep-custom-dl.ll
===================================================================
--- ../test/Transforms/InstCombine/gep-custom-dl.ll
+++ ../test/Transforms/InstCombine/gep-custom-dl.ll
@@ -0,0 +1,155 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:m-p:40:64:64:32-i32:32-i16:16-i8:8-n32"
+
+%struct.B = type { double }
+%struct.A = type { %struct.B, i32, i32 }
+%struct.C = type { [7 x i8] }
+
+
+@Global = constant [10 x i8] c"helloworld"
+
+
+; Test that two array indexing geps fold
+define i32* @test1(i32* %I) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, i32* [[I:%.*]], i32 21
+; CHECK-NEXT:    ret i32* [[B]]
+;
+  %A = getelementptr i32, i32* %I, i8 17
+  %B = getelementptr i32, i32* %A, i16 4
+  ret i32* %B
+}
+
+; Test that two getelementptr insts fold
+define i32* @test2({ i32 }* %I) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[B:%.*]] = getelementptr { i32 }, { i32 }* [[I:%.*]], i32 1, i32 0
+; CHECK-NEXT:    ret i32* [[B]]
+;
+  %A = getelementptr { i32 }, { i32 }* %I, i32 1
+  %B = getelementptr { i32 }, { i32 }* %A, i32 0, i32 0
+  ret i32* %B
+}
+
+define void @test3(i8 %B) {
+; This should be turned into a constexpr instead of being an instruction
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    store i8 [[B:%.*]], i8* getelementptr inbounds ([10 x i8], [10 x i8]* @Global, i32 0, i32 4), align 1
+; CHECK-NEXT:    ret void
+;
+  %A = getelementptr [10 x i8], [10 x i8]* @Global, i32 0, i32 4
+  store i8 %B, i8* %A
+  ret void
+}
+
+%as1_ptr_struct = type { i32 addrspace(1)* }
+%as2_ptr_struct = type { i32 addrspace(2)* }
+
+@global_as2 = addrspace(2) global i32 zeroinitializer
+@global_as1_as2_ptr = addrspace(1) global %as2_ptr_struct { i32 addrspace(2)* @global_as2 }
+
+; This should be turned into a constexpr instead of being an instruction
+define void @test_evaluate_gep_nested_as_ptrs(i32 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_nested_as_ptrs(
+; CHECK-NEXT:    store i32 addrspace(2)* [[B:%.*]], i32 addrspace(2)* addrspace(1)* getelementptr inbounds (%as2_ptr_struct, [[AS2_PTR_STRUCT:%.*]] addrspace(1)* @global_as1_as2_ptr, i32 0, i32 0), align 8
+; CHECK-NEXT:    ret void
+;
+  %A = getelementptr %as2_ptr_struct, %as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i32 0, i32 0
+  store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* %A
+  ret void
+}
+
+@arst = addrspace(1) global [4 x i8 addrspace(2)*] zeroinitializer
+
+define void @test_evaluate_gep_as_ptrs_array(i8 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_as_ptrs_array(
+; CHECK-NEXT:    store i8 addrspace(2)* [[B:%.*]], i8 addrspace(2)* addrspace(1)* getelementptr inbounds ([4 x i8 addrspace(2)*], [4 x i8 addrspace(2)*] addrspace(1)* @arst, i32 0, i32 2), align 16
+; CHECK-NEXT:    ret void
+;
+
+  %A = getelementptr [4 x i8 addrspace(2)*], [4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2
+  store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* %A
+  ret void
+}
+
+define i32* @test4(i32* %I, i32 %C, i32 %D) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, i32* [[I:%.*]], i32 [[C:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, i32* [[A]], i32 [[D:%.*]]
+; CHECK-NEXT:    ret i32* [[B]]
+;
+  %A = getelementptr i32, i32* %I, i32 %C
+  %B = getelementptr i32, i32* %A, i32 %D
+  ret i32* %B
+}
+
+
+define i1 @test5({ i32, i32 }* %x, { i32, i32 }* %y) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP_4:%.*]] = icmp eq { i32, i32 }* [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP_4]]
+;
+  %tmp.1 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1
+  %tmp.3 = getelementptr { i32, i32 }, { i32, i32 }* %y, i32 0, i32 1
+  ;; seteq x, y
+  %tmp.4 = icmp eq i32* %tmp.1, %tmp.3
+  ret i1 %tmp.4
+}
+
+%S = type { i32, [ 100 x i32] }
+
+define <2 x i1> @test6(<2 x i32> %X, <2 x %S*> %P) nounwind {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %A = getelementptr inbounds %S, <2 x %S*> %P, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 1>, <2 x i32> %X
+  %B = getelementptr inbounds %S, <2 x %S*> %P, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32*> %A, %B
+  ret <2 x i1> %C
+}
+
+@G = external global [3 x i8]
+define i8* @test7(i16 %Idx) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[ZE_IDX:%.*]] = zext i16 [[IDX:%.*]] to i32
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [3 x i8], [3 x i8]* @G, i32 0, i32 [[ZE_IDX]]
+; CHECK-NEXT:    ret i8* [[TMP]]
+;
+  %ZE_Idx = zext i16 %Idx to i32
+  %tmp = getelementptr i8, i8* getelementptr ([3 x i8], [3 x i8]* @G, i32 0, i32 0), i32 %ZE_Idx
+  ret i8* %tmp
+}
+
+
+; Test folding of constantexpr geps into normal geps.
+@Array = external global [40 x i32]
+define i32 *@test8(i32 %X) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[A:%.*]] = getelementptr [40 x i32], [40 x i32]* @Array, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    ret i32* [[A]]
+;
+  %A = getelementptr i32, i32* getelementptr ([40 x i32], [40 x i32]* @Array, i32 0, i32 0), i32 %X
+  ret i32* %A
+}
+
+define i32 *@test9(i32 *%base, i8 %ind) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[IND:%.*]] to i32
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32* [[RES]]
+;
+  %res = getelementptr i32, i32 *%base, i8 %ind
+  ret i32* %res
+}
+
+define i32 @test10() {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    ret i32 8
+;
+  %A = getelementptr { i32, double }, { i32, double }* null, i32 0, i32 1
+  %B = ptrtoint double* %A to i32
+  ret i32 %B
+}
Index: ../test/Transforms/InstCombine/icmp-custom-dl.ll
===================================================================
--- ../test/Transforms/InstCombine/icmp-custom-dl.ll
+++ ../test/Transforms/InstCombine/icmp-custom-dl.ll
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:40:64:64:32-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+declare i32 @test58_d(i64 )
+
+define i1 @test59(i8* %foo) {
+; CHECK-LABEL: @test59(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, i8* [[FOO:%.*]], i32 8
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i8* [[GEP1]] to i32
+; CHECK-NEXT:    [[USE:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]])
+; CHECK-NEXT:    ret i1 true
+;
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32, i32* %bit, i64 2
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 10
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  %use = ptrtoint i8* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use)
+  ret i1 %cmp
+}
+
+define i1 @test59_as1(i8 addrspace(1)* %foo) {
+; CHECK-LABEL: @test59_as1(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[FOO:%.*]], i16 8
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[GEP1]] to i16
+; CHECK-NEXT:    [[USE:%.*]] = zext i16 [[TMP1]] to i64
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]])
+; CHECK-NEXT:    ret i1 true
+;
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i64 2
+  %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i64 10
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  %use = ptrtoint i8 addrspace(1)* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use)
+  ret i1 %cmp
+}
+
+define i1 @test60(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[J:%.*]] to i32
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[GEP1_IDX]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60_as1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[J:%.*]] to i16
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i64 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+}
+
+; Same as test60, but look through an addrspacecast instead of a
+; bitcast. This uses the same sized addrspace.
+define i1 @test60_addrspacecast(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60_addrspacecast(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[J:%.*]] to i32
+; CHECK-NEXT:    [[I_TR:%.*]] = trunc i64 [[I:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[I_TR]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %bit = addrspacecast i8* %foo to i32 addrspace(3)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(3)* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
+  %cast1 = addrspacecast i32 addrspace(3)* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test60_addrspacecast_smaller(i8* %foo, i16 %i, i64 %j) {
+; CHECK-LABEL: @test60_addrspacecast_smaller(
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 [[I:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[J:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %bit = addrspacecast i8* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
+  %cast1 = addrspacecast i32 addrspace(1)* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test60_addrspacecast_larger(i8 addrspace(1)* %foo, i32 %i, i16 %j) {
+; CHECK-LABEL: @test60_addrspacecast_larger(
+; CHECK-NEXT:    [[I_TR:%.*]] = trunc i32 [[I:%.*]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 [[I_TR]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i16 [[TMP1]], [[J:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %bit = addrspacecast i8 addrspace(1)* %foo to i32 addrspace(2)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(2)* %bit, i32 %i
+  %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i16 %j
+  %cast1 = addrspacecast i32 addrspace(2)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test61(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test61(
+; CHECK-NEXT:    [[BIT:%.*]] = bitcast i8* [[FOO:%.*]] to i32*
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i32
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, i32* [[BIT]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[J:%.*]] to i32
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, i8* [[FOO]], i32 [[TMP2]]
+; CHECK-NEXT:    [[CAST1:%.*]] = bitcast i32* [[GEP1]] to i8*
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8* [[GEP2]], [[CAST1]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr i32, i32* %bit, i64 %i
+  %gep2 = getelementptr  i8,  i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+}
+
+define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+; CHECK-LABEL: @test61_as1(
+; CHECK-NEXT:    [[BIT:%.*]] = bitcast i8 addrspace(1)* [[FOO:%.*]] to i32 addrspace(1)*
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, i32 addrspace(1)* [[BIT]], i16 [[I:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, i8 addrspace(1)* [[FOO]], i16 [[J:%.*]]
+; CHECK-NEXT:    [[CAST1:%.*]] = bitcast i32 addrspace(1)* [[GEP1]] to i8 addrspace(1)*
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 addrspace(1)* [[GEP2]], [[CAST1]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr i32, i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr i8, i8 addrspace(1)* %foo, i16 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+}
+
+define i1 @test62(i8* %a) {
+; CHECK-LABEL: @test62(
+; CHECK-NEXT:    ret i1 true
+;
+  %arrayidx1 = getelementptr inbounds i8, i8* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 10
+  %cmp = icmp slt i8* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+}
+
+define i1 @test62_as1(i8 addrspace(1)* %a) {
+; CHECK-LABEL: @test62_as1(
+; CHECK-NEXT:    ret i1 true
+;
+  %arrayidx1 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 10
+  %cmp = icmp slt i8 addrspace(1)* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+}
+
+
+; Variation of the above with an ashr
+define i1 @icmp_and_ashr_multiuse(i32 %X) {
+; CHECK-LABEL: @icmp_and_ashr_multiuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 240
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[X]], 496
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 224
+; CHECK-NEXT:    [[TOBOOL2:%.*]] = icmp ne i32 [[AND2]], 432
+; CHECK-NEXT:    [[AND3:%.*]] = and i1 [[TOBOOL]], [[TOBOOL2]]
+; CHECK-NEXT:    ret i1 [[AND3]]
+;
+  %shr = ashr i32 %X, 4
+  %and = and i32 %shr, 15
+  %and2 = and i32 %shr, 31 ; second use of the shift
+  %tobool = icmp ne i32 %and, 14
+  %tobool2 = icmp ne i32 %and2, 27
+  %and3 = and i1 %tobool, %tobool2
+  ret i1 %and3
+}
+
+define i1 @icmp_lshr_and_overshift(i8 %X) {
+; CHECK-LABEL: @icmp_lshr_and_overshift(
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ugt i8 [[X:%.*]], 31
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+  %shr = lshr i8 %X, 5
+  %and = and i8 %shr, 15
+  %tobool = icmp ne i8 %and, 0
+  ret i1 %tobool
+}
+
+; We shouldn't simplify this because the and uses bits that are shifted in.
+define i1 @icmp_ashr_and_overshift(i8 %X) {
+; CHECK-LABEL: @icmp_ashr_and_overshift(
+; CHECK-NEXT:    [[SHR:%.*]] = ashr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[SHR]], 15
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+  %shr = ashr i8 %X, 5
+  %and = and i8 %shr, 15
+  %tobool = icmp ne i8 %and, 0
+  ret i1 %tobool
+}
+
+; PR16244
+define i1 @test71(i8* %x) {
+; CHECK-LABEL: @test71(
+; CHECK-NEXT:    ret i1 false
+;
+  %a = getelementptr i8, i8* %x, i64 8
+  %b = getelementptr inbounds i8, i8* %x, i64 8
+  %c = icmp ugt i8* %a, %b
+  ret i1 %c
+}
+
+define i1 @test71_as1(i8 addrspace(1)* %x) {
+; CHECK-LABEL: @test71_as1(
+; CHECK-NEXT:    ret i1 false
+;
+  %a = getelementptr i8, i8 addrspace(1)* %x, i64 8
+  %b = getelementptr inbounds i8, i8 addrspace(1)* %x, i64 8
+  %c = icmp ugt i8 addrspace(1)* %a, %b
+  ret i1 %c
+}
+
Index: ../test/Transforms/LoopIdiom/struct-custom-dl.ll
===================================================================
--- ../test/Transforms/LoopIdiom/struct-custom-dl.ll
+++ ../test/Transforms/LoopIdiom/struct-custom-dl.ll
@@ -0,0 +1,212 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:40:64:64:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.foo = type { i32, i32 }
+%struct.foo1 = type { i32, i32, i32 }
+%struct.foo2 = type { i32, i16, i16 }
+
+;void bar1(foo_t *f, unsigned n) {
+;  for (unsigned i = 0; i < n; ++i) {
+;    f[i].a = 0;
+;    f[i].b = 0;
+;  }
+;}
+define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 0
+  store i32 0, i32* %a, align 4
+  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 1
+  store i32 0, i32* %b, align 4
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+  %exitcond = icmp ne i32 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+; CHECK-LABEL: @bar1(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void bar2(foo_t *f, unsigned n) {
+;  for (unsigned i = 0; i < n; ++i) {
+;    f[i].b = 0;
+;    f[i].a = 0;
+;  }
+;}
+define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 1
+  store i32 0, i32* %b, align 4
+  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 0
+  store i32 0, i32* %a, align 4
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+  %exitcond = icmp ne i32 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+; CHECK-LABEL: @bar2(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void bar3(foo_t *f, unsigned n) {
+;  for (unsigned i = n; i > 0; --i) {
+;    f[i].a = 0;
+;    f[i].b = 0;
+;  }
+;}
+define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i32 [ %n, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 0
+  store i32 0, i32* %a, align 4
+  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 1
+  store i32 0, i32* %b, align 4
+  %dec = add i32 %indvars.iv, -1
+  %cmp = icmp eq i32 %dec, 0
+  %indvars.iv.next = add nsw i32 %indvars.iv, -1
+  br i1 %cmp, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+; CHECK-LABEL: @bar3(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void bar4(foo_t *f, unsigned n) {
+;  for (unsigned i = 0; i < n; ++i) {
+;    f[i].a = 0;
+;    f[i].b = 1;
+;  }
+;}
+define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 0
+  store i32 0, i32* %a, align 4
+  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 1
+  store i32 1, i32* %b, align 4
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+  %exitcond = icmp ne i32 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+; CHECK-LABEL: @bar4(
+; CHECK-NOT: call void @llvm.memset
+}
+
+;void bar5(foo1_t *f, unsigned n) {
+;  for (unsigned i = 0; i < n; ++i) {
+;    f[i].a = 0;
+;    f[i].b = 0;
+;  }
+;}
+define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i32 %indvars.iv, i32 0
+  store i32 0, i32* %a, align 4
+  %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i32 %indvars.iv, i32 1
+  store i32 0, i32* %b, align 4
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+  %exitcond = icmp ne i32 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+; CHECK-LABEL: @bar5(
+; CHECK-NOT: call void @llvm.memset
+}
+
+;void bar6(foo2_t *f, unsigned n) {
+;  for (unsigned i = 0; i < n; ++i) {
+;    f[i].a = 0;
+;    f[i].b = 0;
+;    f[i].c = 0;
+;  }
+;}
+define void @bar6(%struct.foo2* nocapture %f, i32 %n) nounwind ssp {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %a = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i32 %indvars.iv, i32 0
+  store i32 0, i32* %a, align 4
+  %b = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i32 %indvars.iv, i32 1
+  store i16 0, i16* %b, align 4
+  %c = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i32 %indvars.iv, i32 2
+  store i16 0, i16* %c, align 2
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+  %exitcond = icmp ne i32 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+; CHECK-LABEL: @bar6(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
Index: ../test/Transforms/LoopIdiom/unroll-custom-dl.ll
===================================================================
--- ../test/Transforms/LoopIdiom/unroll-custom-dl.ll
+++ ../test/Transforms/LoopIdiom/unroll-custom-dl.ll
@@ -0,0 +1,78 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+
+target triple = "x86_64-apple-darwin10.0.0"
+
+;void test(int *f, unsigned n) {
+;  for (unsigned i = 0; i < 2 * n; i += 2) {
+;    f[i] = 0;
+;    f[i+1] = 0;
+;  }
+;}
+define void @test(i32* %f, i32 %n) nounwind ssp {
+entry:
+  %0 = shl i32 %n, 1
+  %cmp1 = icmp eq i32 %0, 0
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %f, i32 %indvars.iv
+  store i32 0, i32* %arrayidx, align 4
+  %1 = or i32 %indvars.iv, 1
+  %arrayidx2 = getelementptr inbounds i32, i32* %f, i32 %1
+  store i32 0, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 2
+  %cmp = icmp ult i32 %indvars.iv.next, %0
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+; CHECK-LABEL: @test(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void test_pattern(int *f, unsigned n) {
+;  for (unsigned i = 0; i < 2 * n; i += 2) {
+;    f[i] = 2;
+;    f[i+1] = 2;
+;  }
+;}
+define void @test_pattern(i32* %f, i32 %n) nounwind ssp {
+entry:
+  %mul = shl i32 %n, 1
+  %cmp1 = icmp eq i32 %mul, 0
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %f, i32 %indvars.iv
+  store i32 2, i32* %arrayidx, align 4
+  %x1 = or i32 %indvars.iv, 1
+  %arrayidx2 = getelementptr inbounds i32, i32* %f, i32 %x1
+  store i32 2, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 2
+  %cmp = icmp ult i32 %indvars.iv.next, %mul
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+; CHECK-LABEL: @test_pattern(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
Index: ../test/Transforms/PhaseOrdering/scev-custom-dl.ll
===================================================================
--- ../test/Transforms/PhaseOrdering/scev-custom-dl.ll
+++ ../test/Transforms/PhaseOrdering/scev-custom-dl.ll
@@ -0,0 +1,67 @@
+; RUN: opt -O3 -S -analyze -scalar-evolution < %s | FileCheck %s
+
+target datalayout = "e-m:m-p:40:64:64:32-i32:32-i16:16-i8:8-n32"
+
+;
+; This file contains phase ordering tests for scalar evolution.
+; Test that the standard passes don't obfuscate the IR so scalar evolution can't
+; recognize expressions.
+
+; CHECK: test1
+; The loop body contains two increments by %div.
+; Make sure that 2*%div is recognizable, and not expressed as a bit mask of %d.
+; CHECK: -->  {%p,+,(8 * (%d /u 4))}
+define void @test1(i32 %d, i32* %p) nounwind uwtable ssp {
+entry:
+  %div = udiv i32 %d, 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ne i32 %i.0, 64
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %p.addr.0, align 4
+  %add.ptr = getelementptr inbounds i32, i32* %p.addr.0, i32 %div
+  store i32 1, i32* %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %div
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; CHECK: test1a
+; Same thing as test1, but it is even more tempting to fold 2 * (%d /u 2)
+; CHECK: -->  {%p,+,(8 * (%d /u 2))}
+define void @test1a(i32 %d, i32* %p) nounwind uwtable ssp {
+entry:
+  %div = udiv i32 %d, 2
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ne i32 %i.0, 64
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %p.addr.0, align 4
+  %add.ptr = getelementptr inbounds i32, i32* %p.addr.0, i32 %div
+  store i32 1, i32* %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %div
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
Index: ../test/Transforms/SimplifyCFG/switch_create-custom-dl.ll
===================================================================
--- ../test/Transforms/SimplifyCFG/switch_create-custom-dl.ll
+++ ../test/Transforms/SimplifyCFG/switch_create-custom-dl.ll
@@ -0,0 +1,660 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+target datalayout="p:40:64:64:32"
+
+declare void @foo1()
+
+declare void @foo2()
+
+define void @test1(i32 %V) {
+        %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
+        %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
+T:              ; preds = %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %0
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1(
+; CHECK:  switch i32 %V, label %F [
+; CHECK:    i32 17, label %T
+; CHECK:    i32 4, label %T
+; CHECK:  ]
+}
+
+define void @test1_ptr(i32* %V) {
+        %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*)
+        %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*)
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
+T:              ; preds = %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %0
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1_ptr(
+; DL:  %magicptr = ptrtoint i32* %V to i32
+; DL:  switch i32 %magicptr, label %F [
+; DL:    i32 17, label %T
+; DL:    i32 4, label %T
+; DL:  ]
+}
+
+define void @test1_ptr_as1(i32 addrspace(1)* %V) {
+        %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*)
+        %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*)
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
+T:              ; preds = %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %0
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1_ptr_as1(
+; DL:  %magicptr = ptrtoint i32 addrspace(1)* %V to i16
+; DL:  switch i16 %magicptr, label %F [
+; DL:    i16 17, label %T
+; DL:    i16 4, label %T
+; DL:  ]
+}
+
+define void @test2(i32 %V) {
+        %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
+        %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
+        %CN = and i1 %C1, %C2           ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
+T:              ; preds = %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %0
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test2(
+; CHECK:  switch i32 %V, label %T [
+; CHECK:    i32 17, label %F
+; CHECK:    i32 4, label %F
+; CHECK:  ]
+}
+
+define void @test3(i32 %V) {
+        %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
+        br i1 %C1, label %T, label %N
+N:              ; preds = %0
+        %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
+        br i1 %C2, label %T, label %F
+T:              ; preds = %N, %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %N
+        call void @foo2( )
+        ret void
+
+; CHECK-LABEL: @test3(
+; CHECK: switch i32 %V, label %F [
+; CHECK:     i32 4, label %T
+; CHECK:     i32 17, label %T
+; CHECK:   ]
+}
+
+
+
+define i32 @test4(i8 zeroext %c) nounwind ssp noredzone {
+entry:
+  %cmp = icmp eq i8 %c, 62
+  br i1 %cmp, label %lor.end, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp4 = icmp eq i8 %c, 34
+  br i1 %cmp4, label %lor.end, label %lor.rhs
+
+lor.rhs:                                          ; preds = %lor.lhs.false
+  %cmp8 = icmp eq i8 %c, 92
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %lor.lhs.false, %entry
+  %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp8, %lor.rhs ]
+  %lor.ext = zext i1 %0 to i32
+  ret i32 %lor.ext
+
+; CHECK-LABEL: @test4(
+; CHECK:  switch i8 %c, label %lor.rhs [
+; CHECK:    i8 62, label %lor.end
+; CHECK:    i8 34, label %lor.end
+; CHECK:    i8 92, label %lor.end
+; CHECK:  ]
+}
+
+define i32 @test5(i8 zeroext %c) nounwind ssp noredzone {
+entry:
+  switch i8 %c, label %lor.rhs [
+    i8 62, label %lor.end
+    i8 34, label %lor.end
+    i8 92, label %lor.end
+  ]
+
+lor.rhs:                                          ; preds = %entry
+  %V = icmp eq i8 %c, 92
+  br label %lor.end
+
+lor.end:                                          ; preds = %entry, %entry, %entry, %lor.rhs
+  %0 = phi i1 [ true, %entry ], [ %V, %lor.rhs ], [ true, %entry ], [ true, %entry ]
+  %lor.ext = zext i1 %0 to i32
+  ret i32 %lor.ext
+; CHECK-LABEL: @test5(
+; CHECK:  switch i8 %c, label %lor.rhs [
+; CHECK:    i8 62, label %lor.end
+; CHECK:    i8 34, label %lor.end
+; CHECK:    i8 92, label %lor.end
+; CHECK:  ]
+}
+
+
+define i1 @test6({ i32, i32 }* %I) {
+entry:
+        %tmp.1.i = getelementptr { i32, i32 }, { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
+        %tmp.2.i = load i32, i32* %tmp.1.i           ; <i32> [#uses=6]
+        %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
+        br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
+shortcirc_next.0:               ; preds = %entry
+        %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
+        br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
+shortcirc_next.1:               ; preds = %shortcirc_next.0
+        %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
+        br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
+shortcirc_next.2:               ; preds = %shortcirc_next.1
+        %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
+        br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
+shortcirc_next.3:               ; preds = %shortcirc_next.2
+        %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
+        br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
+shortcirc_next.4:               ; preds = %shortcirc_next.3
+        %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
+        br label %UnifiedReturnBlock
+shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry
+        br label %UnifiedReturnBlock
+UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
+        %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
+        ret i1 %UnifiedRetVal
+
+; CHECK-LABEL: @test6(
+; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
+; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
+}
+
+define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone {
+entry:
+  %cmp = icmp ult i32 %x, 32
+  %cmp4 = icmp eq i8 %c, 97
+  %or.cond = or i1 %cmp, %cmp4
+  %cmp9 = icmp eq i8 %c, 99
+  %or.cond11 = or i1 %or.cond, %cmp9
+  br i1 %or.cond11, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo1() nounwind noredzone
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+
+; CHECK-LABEL: @test7(
+; CHECK:   %cmp = icmp ult i32 %x, 32
+; CHECK:   br i1 %cmp, label %if.then, label %switch.early.test
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %if.end [
+; CHECK:     i8 99, label %if.then
+; CHECK:     i8 97, label %if.then
+; CHECK:   ]
+}
+
+define i32 @test8(i8 zeroext %c, i32 %x, i1 %C) nounwind ssp noredzone {
+entry:
+  br i1 %C, label %N, label %if.then
+N:
+  %cmp = icmp ult i32 %x, 32
+  %cmp4 = icmp eq i8 %c, 97
+  %or.cond = or i1 %cmp, %cmp4
+  %cmp9 = icmp eq i8 %c, 99
+  %or.cond11 = or i1 %or.cond, %cmp9
+  br i1 %or.cond11, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %A = phi i32 [0, %entry], [42, %N]
+  tail call void @foo1() nounwind noredzone
+  ret i32 %A
+
+if.end:                                           ; preds = %entry
+  ret i32 0
+
+; CHECK-LABEL: @test8(
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %if.end [
+; CHECK:     i8 99, label %if.then
+; CHECK:     i8 97, label %if.then
+; CHECK:   ]
+; CHECK:   %A = phi i32 [ 0, %entry ], [ 42, %switch.early.test ], [ 42, %N ], [ 42, %switch.early.test ]
+}
+
+;; This is "Example 7" from http://blog.regehr.org/archives/320
+define i32 @test9(i8 zeroext %c) nounwind ssp noredzone {
+entry:
+  %cmp = icmp ult i8 %c, 33
+  br i1 %cmp, label %lor.end, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp4 = icmp eq i8 %c, 46
+  br i1 %cmp4, label %lor.end, label %lor.lhs.false6
+
+lor.lhs.false6:                                   ; preds = %lor.lhs.false
+  %cmp9 = icmp eq i8 %c, 44
+  br i1 %cmp9, label %lor.end, label %lor.lhs.false11
+
+lor.lhs.false11:                                  ; preds = %lor.lhs.false6
+  %cmp14 = icmp eq i8 %c, 58
+  br i1 %cmp14, label %lor.end, label %lor.lhs.false16
+
+lor.lhs.false16:                                  ; preds = %lor.lhs.false11
+  %cmp19 = icmp eq i8 %c, 59
+  br i1 %cmp19, label %lor.end, label %lor.lhs.false21
+
+lor.lhs.false21:                                  ; preds = %lor.lhs.false16
+  %cmp24 = icmp eq i8 %c, 60
+  br i1 %cmp24, label %lor.end, label %lor.lhs.false26
+
+lor.lhs.false26:                                  ; preds = %lor.lhs.false21
+  %cmp29 = icmp eq i8 %c, 62
+  br i1 %cmp29, label %lor.end, label %lor.lhs.false31
+
+lor.lhs.false31:                                  ; preds = %lor.lhs.false26
+  %cmp34 = icmp eq i8 %c, 34
+  br i1 %cmp34, label %lor.end, label %lor.lhs.false36
+
+lor.lhs.false36:                                  ; preds = %lor.lhs.false31
+  %cmp39 = icmp eq i8 %c, 92
+  br i1 %cmp39, label %lor.end, label %lor.rhs
+
+lor.rhs:                                          ; preds = %lor.lhs.false36
+  %cmp43 = icmp eq i8 %c, 39
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %lor.lhs.false36, %lor.lhs.false31, %lor.lhs.false26, %lor.lhs.false21, %lor.lhs.false16, %lor.lhs.false11, %lor.lhs.false6, %lor.lhs.false, %entry
+  %0 = phi i1 [ true, %lor.lhs.false36 ], [ true, %lor.lhs.false31 ], [ true, %lor.lhs.false26 ], [ true, %lor.lhs.false21 ], [ true, %lor.lhs.false16 ], [ true, %lor.lhs.false11 ], [ true, %lor.lhs.false6 ], [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp43, %lor.rhs ]
+  %conv46 = zext i1 %0 to i32
+  ret i32 %conv46
+
+; CHECK-LABEL: @test9(
+; CHECK:   %cmp = icmp ult i8 %c, 33
+; CHECK:   br i1 %cmp, label %lor.end, label %switch.early.test
+
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %lor.rhs [
+; CHECK:     i8 92, label %lor.end
+; CHECK:     i8 62, label %lor.end
+; CHECK:     i8 60, label %lor.end
+; CHECK:     i8 59, label %lor.end
+; CHECK:     i8 58, label %lor.end
+; CHECK:     i8 46, label %lor.end
+; CHECK:     i8 44, label %lor.end
+; CHECK:     i8 34, label %lor.end
+; CHECK:     i8 39, label %lor.end
+; CHECK:   ]
+}
+
+define i32 @test10(i32 %mode, i1 %Cond) {
+  %A = icmp ne i32 %mode, 0
+  %B = icmp ne i32 %mode, 51
+  %C = and i1 %A, %B
+  %D = and i1 %C, %Cond
+  br i1 %D, label %T, label %F
+T:
+  ret i32 123
+F:
+  ret i32 324
+
+; CHECK-LABEL: @test10(
+; CHECK:  br i1 %Cond, label %switch.early.test, label %F
+; CHECK:switch.early.test:
+; CHECK:  switch i32 %mode, label %T [
+; CHECK:    i32 51, label %F
+; CHECK:    i32 0, label %F
+; CHECK:  ]
+}
+
+; PR8780
+define i32 @test11(i32 %bar) nounwind {
+entry:
+  %cmp = icmp eq i32 %bar, 4
+  %cmp2 = icmp eq i32 %bar, 35
+  %or.cond = or i1 %cmp, %cmp2
+  %cmp5 = icmp eq i32 %bar, 53
+  %or.cond1 = or i1 %or.cond, %cmp5
+  %cmp8 = icmp eq i32 %bar, 24
+  %or.cond2 = or i1 %or.cond1, %cmp8
+  %cmp11 = icmp eq i32 %bar, 23
+  %or.cond3 = or i1 %or.cond2, %cmp11
+  %cmp14 = icmp eq i32 %bar, 55
+  %or.cond4 = or i1 %or.cond3, %cmp14
+  %cmp17 = icmp eq i32 %bar, 12
+  %or.cond5 = or i1 %or.cond4, %cmp17
+  %cmp20 = icmp eq i32 %bar, 35
+  %or.cond6 = or i1 %or.cond5, %cmp20
+  br i1 %or.cond6, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %if.end ]
+  ret i32 %retval.0
+
+; CHECK-LABEL: @test11(
+; CHECK: switch i32 %bar, label %if.end [
+; CHECK:   i32 55, label %return
+; CHECK:   i32 53, label %return
+; CHECK:   i32 35, label %return
+; CHECK:   i32 24, label %return
+; CHECK:   i32 23, label %return
+; CHECK:   i32 12, label %return
+; CHECK:   i32 4, label %return
+; CHECK: ]
+}
+
+define void @test12() nounwind {
+entry:
+  br label %bb49.us.us
+
+bb49.us.us:
+  %A = icmp eq i32 undef, undef
+  br i1 %A, label %bb55.us.us, label %malformed
+
+bb48.us.us:
+  %B = icmp ugt i32 undef, undef
+  br i1 %B, label %bb55.us.us, label %bb49.us.us
+
+bb55.us.us:
+  br label %bb48.us.us
+
+malformed:
+  ret void
+; CHECK-LABEL: @test12(
+
+}
+
+; test13 - handle switch formation with ult.
+define void @test13(i32 %x) nounwind ssp noredzone {
+entry:
+  %cmp = icmp ult i32 %x, 2
+  br i1 %cmp, label %if.then, label %lor.lhs.false3
+
+lor.lhs.false3:                                   ; preds = %lor.lhs.false
+  %cmp5 = icmp eq i32 %x, 3
+  br i1 %cmp5, label %if.then, label %lor.lhs.false6
+
+lor.lhs.false6:                                   ; preds = %lor.lhs.false3
+  %cmp8 = icmp eq i32 %x, 4
+  br i1 %cmp8, label %if.then, label %lor.lhs.false9
+
+lor.lhs.false9:                                   ; preds = %lor.lhs.false6
+  %cmp11 = icmp eq i32 %x, 6
+  br i1 %cmp11, label %if.then, label %if.end
+
+if.then:                                          ; preds = %lor.lhs.false9, %lor.lhs.false6, %lor.lhs.false3, %lor.lhs.false, %entry
+  call void @foo1() noredzone
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %lor.lhs.false9
+  ret void
+; CHECK-LABEL: @test13(
+; CHECK:  switch i32 %x, label %if.end [
+; CHECK:     i32 6, label %if.then
+; CHECK:     i32 4, label %if.then
+; CHECK:     i32 3, label %if.then
+; CHECK:     i32 1, label %if.then
+; CHECK:     i32 0, label %if.then
+; CHECK:   ]
+}
+
+; test14 - handle switch formation with ult.
+define void @test14(i32 %x) nounwind ssp noredzone {
+entry:
+  %cmp = icmp ugt i32 %x, 2
+  br i1 %cmp, label %lor.lhs.false3, label %if.then
+
+lor.lhs.false3:                                   ; preds = %lor.lhs.false
+  %cmp5 = icmp ne i32 %x, 3
+  br i1 %cmp5, label %lor.lhs.false6, label %if.then
+
+lor.lhs.false6:                                   ; preds = %lor.lhs.false3
+  %cmp8 = icmp ne i32 %x, 4
+  br i1 %cmp8, label %lor.lhs.false9, label %if.then
+
+lor.lhs.false9:                                   ; preds = %lor.lhs.false6
+  %cmp11 = icmp ne i32 %x, 6
+  br i1 %cmp11, label %if.end, label %if.then
+
+if.then:                                          ; preds = %lor.lhs.false9, %lor.lhs.false6, %lor.lhs.false3, %lor.lhs.false, %entry
+  call void @foo1() noredzone
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %lor.lhs.false9
+  ret void
+; CHECK-LABEL: @test14(
+; CHECK:  switch i32 %x, label %if.end [
+; CHECK:     i32 6, label %if.then
+; CHECK:     i32 4, label %if.then
+; CHECK:     i32 3, label %if.then
+; CHECK:     i32 1, label %if.then
+; CHECK:     i32 0, label %if.then
+; CHECK:   ]
+}
+
+; Don't crash on ginormous ranges.
+define void @test15(i128 %x) nounwind {
+  %cmp = icmp ugt i128 %x, 2
+  br i1 %cmp, label %if.end, label %lor.false
+
+lor.false:
+  %cmp2 = icmp ne i128 %x, 100000000000000000000
+  br i1 %cmp2, label %if.end, label %if.then
+
+if.then:
+  call void @foo1() noredzone
+  br label %if.end
+
+if.end:
+  ret void
+
+; CHECK-LABEL: @test15(
+; CHECK-NOT: switch
+; CHECK: ret void
+}
+
+; PR8675
+; rdar://5134905
+define zeroext i1 @test16(i32 %x) nounwind {
+entry:
+; CHECK-LABEL: @test16(
+; CHECK: %x.off = add i32 %x, -1
+; CHECK: %switch = icmp ult i32 %x.off, 3
+  %cmp.i = icmp eq i32 %x, 1
+  br i1 %cmp.i, label %lor.end, label %lor.lhs.false
+
+lor.lhs.false:
+  %cmp.i2 = icmp eq i32 %x, 2
+  br i1 %cmp.i2, label %lor.end, label %lor.rhs
+
+lor.rhs:
+  %cmp.i1 = icmp eq i32 %x, 3
+  br label %lor.end
+
+lor.end:
+  %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp.i1, %lor.rhs ]
+  ret i1 %0
+}
+
+; Check that we don't turn an icmp into a switch where it's not useful.
+define void @test17(i32 %x, i32 %y) {
+  %cmp = icmp ult i32 %x, 3
+  %switch = icmp ult i32 %y, 2
+  %or.cond775 = or i1 %cmp, %switch
+  br i1 %or.cond775, label %lor.lhs.false8, label %return
+
+lor.lhs.false8:
+  tail call void @foo1()
+  ret void
+
+return:
+  ret void
+
+; CHECK-LABEL: @test17(
+; CHECK-NOT: switch.early.test
+; CHECK-NOT: switch i32
+; CHECK: ret void
+}
+
+define void @test18(i32 %arg) {
+bb:
+  %tmp = and i32 %arg, -2
+  %tmp1 = icmp eq i32 %tmp, 8
+  %tmp2 = icmp eq i32 %arg, 10
+  %tmp3 = or i1 %tmp1, %tmp2
+  %tmp4 = icmp eq i32 %arg, 11
+  %tmp5 = or i1 %tmp3, %tmp4
+  %tmp6 = icmp eq i32 %arg, 12
+  %tmp7 = or i1 %tmp5, %tmp6
+  br i1 %tmp7, label %bb19, label %bb8
+
+bb8:                                              ; preds = %bb
+  %tmp9 = add i32 %arg, -13
+  %tmp10 = icmp ult i32 %tmp9, 2
+  %tmp11 = icmp eq i32 %arg, 16
+  %tmp12 = or i1 %tmp10, %tmp11
+  %tmp13 = icmp eq i32 %arg, 17
+  %tmp14 = or i1 %tmp12, %tmp13
+  %tmp15 = icmp eq i32 %arg, 18
+  %tmp16 = or i1 %tmp14, %tmp15
+  %tmp17 = icmp eq i32 %arg, 15
+  %tmp18 = or i1 %tmp16, %tmp17
+  br i1 %tmp18, label %bb19, label %bb20
+
+bb19:                                             ; preds = %bb8, %bb
+  tail call void @foo1()
+  br label %bb20
+
+bb20:                                             ; preds = %bb19, %bb8
+  ret void
+
+; CHECK-LABEL: @test18(
+; CHECK: %arg.off = add i32 %arg, -8
+; CHECK: icmp ult i32 %arg.off, 11
+}
+
+define void @PR26323(i1 %tobool23, i32 %tmp3) {
+entry:
+  %tobool5 = icmp ne i32 %tmp3, 0
+  %neg14 = and i32 %tmp3, -2
+  %cmp17 = icmp ne i32 %neg14, -1
+  %or.cond = and i1 %tobool5, %tobool23
+  %or.cond1 = and i1 %cmp17, %or.cond
+  br i1 %or.cond1, label %if.end29, label %if.then27
+
+if.then27:                                        ; preds = %entry
+  call void @foo1()
+  unreachable
+
+if.end29:                                         ; preds = %entry
+  ret void
+}
+
+; CHECK-LABEL: define void @PR26323(
+; CHECK:  %tobool5 = icmp ne i32 %tmp3, 0
+; CHECK:  %neg14 = and i32 %tmp3, -2
+; CHECK:  %cmp17 = icmp ne i32 %neg14, -1
+; CHECK:  %or.cond = and i1 %tobool5, %tobool23
+; CHECK:  %or.cond1 = and i1 %cmp17, %or.cond
+; CHECK:  br i1 %or.cond1, label %if.end29, label %if.then27
+
+; Form a switch when and'ing a negated power of two
+; CHECK-LABEL: define void @test19
+; CHECK: switch i32 %arg, label %else [
+; CHECK: i32 32, label %if
+; CHECK: i32 13, label %if
+; CHECK: i32 12, label %if
+define void @test19(i32 %arg) {
+  %and = and i32 %arg, -2
+  %cmp1 = icmp eq i32 %and, 12
+  %cmp2 = icmp eq i32 %arg, 32
+  %pred = or i1 %cmp1, %cmp2
+  br i1 %pred, label %if, label %else
+
+if:
+  call void @foo1()
+  ret void
+
+else:
+  ret void
+}
+
+; Since %cmp1 is always false, a switch is never formed
+; CHECK-LABEL: define void @test20
+; CHECK-NOT: switch
+; CHECK: ret void
+define void @test20(i32 %arg) {
+  %and = and i32 %arg, -2
+  %cmp1 = icmp eq i32 %and, 13
+  %cmp2 = icmp eq i32 %arg, 32
+  %pred = or i1 %cmp1, %cmp2
+  br i1 %pred, label %if, label %else
+
+if:
+  call void @foo1()
+  ret void
+
+else:
+  ret void
+}
+
+; Form a switch when or'ing a power of two
+; CHECK-LABEL: define void @test21
+; CHECK: i32 32, label %else
+; CHECK: i32 13, label %else
+; CHECK: i32 12, label %else
+define void @test21(i32 %arg) {
+  %and = or i32 %arg, 1
+  %cmp1 = icmp ne i32 %and, 13
+  %cmp2 = icmp ne i32 %arg, 32
+  %pred = and i1 %cmp1, %cmp2
+  br i1 %pred, label %if, label %else
+
+if:
+  call void @foo1()
+  ret void
+
+else:
+  ret void
+}
+
+; Since %cmp1 is always false, a switch is never formed
+; CHECK-LABEL: define void @test22
+; CHECK-NOT: switch
+; CHECK: ret void
+define void @test22(i32 %arg) {
+  %and = or i32 %arg, 1
+  %cmp1 = icmp ne i32 %and, 12
+  %cmp2 = icmp ne i32 %arg, 32
+  %pred = and i1 %cmp1, %cmp2
+  br i1 %pred, label %if, label %else
+
+if:
+  call void @foo1()
+  ret void
+
+else:
+  ret void
+}
\ No newline at end of file