Index: llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
===================================================================
--- llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -52,6 +52,8 @@
 public:
   /// Maximum size of array considered when transforming.
   uint64_t MaxArraySizeForCombine = 0;
+  /// Maximum bitwidth of data considered when transforming.
+  uint64_t MaxDataSizeForCombine = 0;
 
   /// An IRBuilder that automatically inserts new instructions into the
   /// worklist.
Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -11,7 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombineInternal.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CaptureTracking.h"
@@ -20,10 +22,13 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
 
@@ -108,9 +113,7 @@
 Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
     LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
     ConstantInt *AndCst) {
-  if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
-      GV->getValueType() != GEP->getSourceElementType() ||
-      !GV->isConstant() || !GV->hasDefinitiveInitializer())
+  if (LI->isVolatile() || !GV->isConstant() || !GV->hasDefinitiveInitializer())
     return nullptr;
 
   Constant *Init = GV->getInitializer();
@@ -118,44 +121,30 @@
     return nullptr;
 
   uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
+  uint64_t DataSize = DL.getTypeAllocSize(Init->getType());
+
   // Don't blow up on huge arrays.
   if (ArrayElementCount > MaxArraySizeForCombine)
     return nullptr;
-
-  // There are many forms of this optimization we can handle, for now, just do
-  // the simple index into a single-dimensional array.
-  //
-  // Require: GEP GV, 0, i {{, constant indices}}
-  if (GEP->getNumOperands() < 3 ||
-      !isa<ConstantInt>(GEP->getOperand(1)) ||
-      !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
-      isa<Constant>(GEP->getOperand(2)))
+  if (DataSize > MaxDataSizeForCombine)
     return nullptr;
 
-  // Check that indices after the variable are constants and in-range for the
-  // type they index.  Collect the indices.  This is typically for arrays of
-  // structs.
-  SmallVector<unsigned, 4> LaterIndices;
+  Type *LoadedTy = LI->getType();
+  uint64_t LoadedTySize = DL.getTypeAllocSize(LoadedTy);
+  uint64_t PtrBitwidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace());
+  Type *PtrIdxTy = DL.getIndexType(GEP->getType());
 
-  Type *EltTy = Init->getType()->getArrayElementType();
-  for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
-    ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
-    if (!Idx) return nullptr;  // Variable index.
+  MapVector<Value *, APInt> VariableOffset;
+  APInt ConstantOffset(PtrBitwidth, 0);
+  GEP->collectOffset(GEP->getModule()->getDataLayout(), PtrBitwidth,
+                     VariableOffset, ConstantOffset);
 
-    uint64_t IdxVal = Idx->getZExtValue();
-    if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
+  // Do not fold constant
+  if (VariableOffset.size() == 0)
+    return nullptr;
 
-    if (StructType *STy = dyn_cast<StructType>(EltTy))
-      EltTy = STy->getElementType(IdxVal);
-    else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
-      if (IdxVal >= ATy->getNumElements()) return nullptr;
-      EltTy = ATy->getElementType();
-    } else {
-      return nullptr; // Unknown type.
-    }
-
-    LaterIndices.push_back(IdxVal);
-  }
+  // There are many forms of this optimization we can handle.
+  // Fold: cmp(A[ax + by + ... + C], Rhs) <=> cmp(ax + by + .... + C, IndexRhs)
 
   enum { Overdefined = -3, Undefined = -2 };
 
@@ -185,18 +174,17 @@
   // the array, this will fully represent all the comparison results.
   uint64_t MagicBitvector = 0;
 
+  Value *Idx = nullptr;
+
   // Scan the array and see if one of our patterns matches.
-  Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
-  for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
-    Constant *Elt = Init->getAggregateElement(i);
-    if (!Elt) return nullptr;
-
-    // If this is indexing an array of structures, get the structure element.
-    if (!LaterIndices.empty()) {
-      Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices);
-      if (!Elt)
-        return nullptr;
-    }
+  Constant *ComparedRHS = cast<Constant>(ICI.getOperand(1));
+  // TODO: Make increase as large as it can be. Proformance.
+  for (uint64_t i = 0; i <= DataSize - LoadedTySize; ++i) {
+    APInt Offset(PtrBitwidth, i);
+    Constant *Elt = ConstantFoldLoadFromConstPtr(GV, LoadedTy, Offset, DL);
+
+    if (!Elt)
+      return nullptr;
 
     // If the element is masked, handle it.
     if (AndCst) {
@@ -207,7 +195,7 @@
 
     // Find out if the comparison would be true or false for the i'th element.
     Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
-                                                  CompareRHS, DL, &TLI);
+                                                  ComparedRHS, DL, &TLI);
     // If the result is undef for this element, ignore it.
     if (isa<UndefValue>(C)) {
       // Extend range state machines to cover this element in case there is an
@@ -279,27 +267,16 @@
 
   // Now that we've scanned the entire array, emit our new comparison(s).  We
   // order the state machines in complexity of the generated code.
-  Value *Idx = GEP->getOperand(2);
-
-  // If the index is larger than the pointer offset size of the target, truncate
-  // the index down like the GEP would do implicitly.  We don't have to do this
-  // for an inbounds GEP because the index can't be out of range.
-  if (!GEP->isInBounds()) {
-    Type *PtrIdxTy = DL.getIndexType(GEP->getType());
-    unsigned OffsetSize = PtrIdxTy->getIntegerBitWidth();
-    if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > OffsetSize)
-      Idx = Builder.CreateTrunc(Idx, PtrIdxTy);
-  }
 
-  // If inbounds keyword is not present, Idx * ElementSize can overflow.
+  // If inbounds keyword is not present, Idx can overflow.
   // Let's assume that ElementSize is 2 and the wanted value is at offset 0.
   // Then, there are two possible values for Idx to match offset 0:
   // 0x00..00, 0x80..00.
   // Emitting 'icmp eq Idx, 0' isn't correct in this case because the
   // comparison is false if Idx was 0x80..00.
   // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
-  unsigned ElementSize =
-      DL.getTypeAllocSize(Init->getType()->getArrayElementType());
+  // TODO: let ElementSize to be gcd(a,b,c,d,...)
+  unsigned ElementSize = 1;
   auto MaskIdx = [&](Value *Idx) {
     if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) {
       Value *Mask = ConstantInt::get(Idx->getType(), -1);
@@ -309,10 +286,40 @@
     return Idx;
   };
 
+  auto GenerateIndexIfNull = [&](Value *CurIdx) {
+    if (CurIdx)
+      return CurIdx;
+
+    Value *Idx = ConstantInt::get(PtrIdxTy->getContext(), ConstantOffset);
+    for (auto [Var, Coefficient] : VariableOffset) {
+      uint64_t VarBitWidth = Var->getType()->getScalarSizeInBits();
+      uint64_t IdxBitWidth = Idx->getType()->getScalarSizeInBits();
+      Type *WiderType =
+          VarBitWidth > IdxBitWidth ? Var->getType() : Idx->getType();
+
+      Var = Builder.CreateSExtOrTrunc(Var, WiderType);
+      Idx = Builder.CreateSExtOrTrunc(Idx, WiderType);
+      Value *Mul = Builder.CreateMul(
+          Var,
+          ConstantInt::get(WiderType, Coefficient.sextOrTrunc(
+                                          WiderType->getScalarSizeInBits())));
+      Idx = Builder.CreateAdd(Idx, Mul);
+    }
+
+    // If the index is larger than the pointer offset size of the target,
+    // truncate the index down like the GEP would do implicitly.  We don't have
+    // to do this for an inbounds GEP because the index can't be out of range.
+
+    if (Idx->getType()->getScalarSizeInBits() > PtrBitwidth)
+      Idx = Builder.CreateTrunc(Idx, PtrIdxTy);
+
+    return Idx;
+  };
+
   // If the comparison is only true for one or two elements, emit direct
   // comparisons.
   if (SecondTrueElement != Overdefined) {
-    Idx = MaskIdx(Idx);
+    Idx = MaskIdx(GenerateIndexIfNull(Idx));
     // None true -> false.
     if (FirstTrueElement == Undefined)
       return replaceInstUsesWith(ICI, Builder.getFalse());
@@ -333,7 +340,7 @@
   // If the comparison is only false for one or two elements, emit direct
   // comparisons.
   if (SecondFalseElement != Overdefined) {
-    Idx = MaskIdx(Idx);
+    Idx = MaskIdx(GenerateIndexIfNull(Idx));
     // None false -> true.
     if (FirstFalseElement == Undefined)
       return replaceInstUsesWith(ICI, Builder.getTrue());
@@ -355,7 +362,7 @@
   // where it is true, emit the range check.
   if (TrueRangeEnd != Overdefined) {
     assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
-    Idx = MaskIdx(Idx);
+    Idx = MaskIdx(GenerateIndexIfNull(Idx));
 
     // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
     if (FirstTrueElement) {
@@ -363,23 +370,23 @@
       Idx = Builder.CreateAdd(Idx, Offs);
     }
 
-    Value *End = ConstantInt::get(Idx->getType(),
-                                  TrueRangeEnd-FirstTrueElement+1);
+    Value *End =
+        ConstantInt::get(PtrIdxTy, TrueRangeEnd - FirstTrueElement + 1);
     return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
   }
 
   // False range check.
   if (FalseRangeEnd != Overdefined) {
     assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
-    Idx = MaskIdx(Idx);
+    Idx = MaskIdx(GenerateIndexIfNull(Idx));
     // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
     if (FirstFalseElement) {
       Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
       Idx = Builder.CreateAdd(Idx, Offs);
     }
 
-    Value *End = ConstantInt::get(Idx->getType(),
-                                  FalseRangeEnd-FirstFalseElement);
+    Value *End =
+        ConstantInt::get(Idx->getType(), FalseRangeEnd - FirstFalseElement);
     return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
   }
 
@@ -392,13 +399,15 @@
     // Look for an appropriate type:
     // - The type of Idx if the magic fits
     // - The smallest fitting legal type
-    if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
-      Ty = Idx->getType();
+    uint64_t TraversedElementCount = DataSize - LoadedTySize + 1;
+    if (TraversedElementCount <= PtrIdxTy->getIntegerBitWidth())
+      Ty = PtrIdxTy;
     else
-      Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
+      Ty =
+          DL.getSmallestLegalIntType(Init->getContext(), TraversedElementCount);
 
     if (Ty) {
-      Idx = MaskIdx(Idx);
+      Idx = MaskIdx(GenerateIndexIfNull(Idx));
       Value *V = Builder.CreateIntCast(Idx, Ty, false);
       V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
       V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -4297,6 +4297,7 @@
     InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
                         ORE, BFI, PSI, DL, LI);
     IC.MaxArraySizeForCombine = MaxArraySize;
+    IC.MaxDataSizeForCombine = MaxArraySize * 8;
 
     if (!IC.run())
       break;
Index: llvm/test/Transforms/InstCombine/load-cmp.ll
===================================================================
--- llvm/test/Transforms/InstCombine/load-cmp.ll
+++ llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -25,8 +25,9 @@
 
 define i1 @test1(i32 %X) {
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[X:%.*]], 9
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[DOTMASK:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[DOTMASK]], 9
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
   %Q = load i16, ptr %P
@@ -36,9 +37,9 @@
 
 define i1 @test1_noinbounds(i32 %X) {
 ; CHECK-LABEL: @test1_noinbounds(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 2147483647
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 9
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[DOTMASK:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[DOTMASK]], 9
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %P = getelementptr [10 x i16], ptr @G16, i32 0, i32 %X
   %Q = load i16, ptr %P
@@ -48,9 +49,9 @@
 
 define i1 @test1_noinbounds_i64(i64 %X) {
 ; CHECK-LABEL: @test1_noinbounds_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i64 [[TMP1]], 9
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[DOTMASK1:%.*]] = and i64 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[DOTMASK1]], 9
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %P = getelementptr [10 x i16], ptr @G16, i64 0, i64 %X
   %Q = load i16, ptr %P
@@ -60,9 +61,9 @@
 
 define i1 @test1_noinbounds_as1(i32 %x) {
 ; CHECK-LABEL: @test1_noinbounds_as1(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 32767
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 9
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[DOTMASK1:%.*]] = and i32 [[X:%.*]], 32767
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[DOTMASK1]], 9
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %p = getelementptr [10 x i16], ptr addrspace(1) @G16_as1, i16 0, i32 %x
   %q = load i16, ptr addrspace(1) %p
@@ -73,9 +74,9 @@
 
 define i1 @test1_noinbounds_as2(i64 %x) {
 ; CHECK-LABEL: @test1_noinbounds_as2(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i64 [[TMP1]], 9
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[DOTMASK1:%.*]] = and i64 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[DOTMASK1]], 9
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %p = getelementptr [10 x i16], ptr addrspace(2) @G16_as2, i16 0, i64 %x
   %q = load i16, ptr addrspace(2) %p
@@ -86,7 +87,10 @@
 
 define i1 @test2(i32 %X) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 480341, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
@@ -97,7 +101,8 @@
 
 define i1 @test3(i32 %X) {
 ; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[DOTMASK:%.*]] = and i32 [[X:%.*]], 536870911
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[DOTMASK]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 %X
@@ -109,9 +114,10 @@
 
 define i1 @test4(i32 %X) {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 933, [[X:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 1
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 476177, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
@@ -122,10 +128,11 @@
 
 define i1 @test4_i16(i16 %X) {
 ; CHECK-LABEL: @test4_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 933, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 476177, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP4]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i16 %X
@@ -136,9 +143,10 @@
 
 define i1 @test5(i32 %X) {
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 2
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[X]], 7
-; CHECK-NEXT:    [[R:%.*]] = or i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 14
+; CHECK-NEXT:    [[R:%.*]] = or i1 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
@@ -149,8 +157,11 @@
 
 define i1 @test6(i32 %X) {
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[R:%.*]] = icmp ult i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i64 1095216660350, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i64 [[TMP3]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP4]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 %X
@@ -161,8 +172,11 @@
 
 define i1 @test7(i32 %X) {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -4
-; CHECK-NEXT:    [[R:%.*]] = icmp ult i32 [[TMP1]], -3
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i64 1103806595201, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i64 [[TMP3]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP4]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 %X
@@ -173,8 +187,10 @@
 
 define i1 @test8(i32 %X) {
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2
-; CHECK-NEXT:    [[S:%.*]] = icmp eq i32 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 502442, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[S:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK-NEXT:    ret i1 [[S]]
 ;
   %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
@@ -193,8 +209,11 @@
 
 define i1 @test9(i32 %X) {
 ; CHECK-LABEL: @test9(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[R:%.*]] = icmp ult i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 1052673, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP4]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr inbounds [4 x { i32, i32 } ], ptr @GA, i32 0, i32 %X, i32 1
@@ -266,7 +285,12 @@
 
 define i1 @test10_struct_arr(i32 %x) {
 ; CHECK-LABEL: @test10_struct_arr(
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i64 72058693549555968, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP5]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr inbounds [4 x %Foo], ptr @GStructArr, i32 0, i32 %x, i32 2
@@ -277,8 +301,12 @@
 
 define i1 @test10_struct_arr_noinbounds(i32 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 268435455
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i64 72058693549555968, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP5]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [4 x %Foo], ptr @GStructArr, i32 0, i32 %x, i32 2
@@ -289,7 +317,13 @@
 
 define i1 @test10_struct_arr_i16(i16 %x) {
 ; CHECK-LABEL: @test10_struct_arr_i16(
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i16 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[X:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 4294967280
+; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP3]], 8
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i64 72058693549555968, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = and i64 [[TMP5]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP6]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr inbounds [4 x %Foo], ptr @GStructArr, i16 0, i16 %x, i32 2
@@ -300,8 +334,12 @@
 
 define i1 @test10_struct_arr_i64(i64 %x) {
 ; CHECK-LABEL: @test10_struct_arr_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967295
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], 4294967280
+; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[TMP2]], 8
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i64 72058693549555968, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP5]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr inbounds [4 x %Foo], ptr @GStructArr, i64 0, i64 %x, i32 2
@@ -312,9 +350,13 @@
 
 define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 268435455
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[X:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 4294967280
+; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP3]], 8
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i64 72058693549555968, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = and i64 [[TMP5]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP6]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [4 x %Foo], ptr @GStructArr, i32 0, i16 %x, i32 2
@@ -325,8 +367,12 @@
 
 define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 268435455
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], 4294967280
+; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[TMP2]], 8
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i64 72058693549555968, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP5]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [4 x %Foo], ptr @GStructArr, i32 0, i64 %x, i32 2
@@ -338,6 +384,7 @@
 
 @CG = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
 
+; TODO: Fold it globally.
 define i1 @cmp_load_constant_array0(i64 %x){
 ; CHECK-LABEL: @cmp_load_constant_array0(
 ; CHECK-NEXT:  entry:
@@ -346,10 +393,8 @@
 ; CHECK:       case2:
 ; CHECK-NEXT:    ret i1 false
 ; CHECK:       case1:
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[X]] to i32
-; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]]
-; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
-; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp ult i32 [[ISOK]], 3
+; CHECK-NEXT:    [[TMP0:%.*]] = and i64 [[X]], 1073741822
+; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp eq i64 [[TMP0]], 0
 ; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
 ;
 entry:
@@ -374,11 +419,7 @@
 ; CHECK:       case2:
 ; CHECK-NEXT:    ret i1 false
 ; CHECK:       case1:
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[X]] to i32
-; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]]
-; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
-; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp ugt i32 [[ISOK]], 10
-; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
+; CHECK-NEXT:    ret i1 false
 ;
 entry:
   %cond = icmp ult i64 %x, 2
@@ -404,10 +445,11 @@
 ; CHECK:       case2:
 ; CHECK-NEXT:    ret i1 false
 ; CHECK:       case1:
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[X]] to i32
-; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP0]]
-; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
-; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp slt i32 [[ISOK]], 5
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i64 [[X]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 4294967292
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 4312859105, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 1
+; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp ne i64 [[TMP3]], 0
 ; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
 ;
 entry:
@@ -451,4 +493,3 @@
   %cond_inferred = icmp ult i32 %isOK, %y
   ret i1 %cond_inferred
 }
-