Index: llvm/include/llvm/Analysis/ConstantFolding.h
===================================================================
--- llvm/include/llvm/Analysis/ConstantFolding.h
+++ llvm/include/llvm/Analysis/ConstantFolding.h
@@ -34,6 +34,7 @@
 class Instruction;
 class TargetLibraryInfo;
 class Type;
+class Value;
 
 /// If this constant is a constant offset from a global, return the global and
 /// the constant. Because of constantexprs, this function is recursive.
@@ -170,6 +171,13 @@
 /// represented, return null.
 Constant *ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty);
 
+/// If C is a constant patterned array and all valid loaded results for given
+/// alignment are same to a constant, return that constant.
+Constant *ConstantFoldLoadFromPatternedAggregate(Constant *C, Type *LoadTy,
+                                                 uint64_t LoadAlign,
+                                                 Value *PtrOp,
+                                                 const DataLayout &DL);
+
 /// canConstantFoldCallTo - Return true if its even possible to fold a call to
 /// the specified function.
 bool canConstantFoldCallTo(const CallBase *Call, const Function *F);
Index: llvm/lib/Analysis/ConstantFolding.cpp
===================================================================
--- llvm/lib/Analysis/ConstantFolding.cpp
+++ llvm/lib/Analysis/ConstantFolding.cpp
@@ -767,6 +767,72 @@
   return nullptr;
 }
 
+// Return a minimum gep stride, greatest common divisor of consective gep
+// indices type sizes (c.f. Bézout's identity). Currently ignore the indices
+// constantness and struct type.
+uint64_t GetMinimumGEPStride(Value *PtrOp, const DataLayout DL) {
+
+  uint64_t g;
+  if (auto GEP = dyn_cast<GEPOperator>(PtrOp)) {
+    auto Euc = [](uint64_t a, uint64_t b) {
+      if (a < b) {
+        uint64_t t = a;
+        a = b;
+        b = t;
+      }
+      while (b != 0) {
+        uint64_t r = a % b;
+        a = b;
+        b = r;
+      }
+      return a;
+    };
+    g = DL.getTypeStoreSize(GEP->getSourceElementType());
+    Value *V = GEP;
+    while (auto GEP = dyn_cast<GEPOperator>(V)) {
+      g = Euc(g, DL.getTypeStoreSize(GEP->getResultElementType()));
+      V = GEP->getPointerOperand();
+    }
+    return g;
+  }
+
+  return 1;
+}
+
+Constant *llvm::ConstantFoldLoadFromPatternedAggregate(Constant *C,
+                                                       Type *LoadTy,
+                                                       uint64_t LoadAlign,
+                                                       Value *PtrOp,
+                                                       const DataLayout &DL) {
+
+  unsigned GVSize = DL.getTypeStoreSize(C->getType());
+
+  // Bail for large initializers in excess of 1K to avoid allocating
+  // too much memory.
+  if (!GVSize || 1024 < GVSize)
+    return nullptr;
+
+  unsigned LoadSize = LoadTy->getScalarSizeInBits() / 8;
+  const APInt Offset(DL.getTypeSizeInBits(C->getType()), 0);
+  Constant *Ca = ConstantFoldLoadFromConst(
+      C, LoadTy, APInt(DL.getTypeSizeInBits(C->getType()), 0), DL);
+
+  // Any possible offset could be multiple of minimum GEP stride. And any valid
+  // offset is multiple of load alignment, so checking onle multiples of bigger
+  // one is sufficient to say results' equality.
+  uint64_t Stride = GetMinimumGEPStride(PtrOp, DL);
+  Stride = Stride < LoadAlign ? LoadAlign : Stride;
+
+  for (uint64_t ByteOffset = Stride, E = GVSize - LoadSize; ByteOffset <= E;
+       ByteOffset += Stride)
+    if (Ca != ConstantFoldLoadFromConst(
+                  C, LoadTy,
+                  APInt(DL.getTypeSizeInBits(C->getType()), ByteOffset), DL))
+      return nullptr;
+
+  return Ca;
+}
+
 namespace {
 
 /// One of Op0/Op1 is a constant expression.
Index: llvm/lib/Analysis/InstructionSimplify.cpp
===================================================================
--- llvm/lib/Analysis/InstructionSimplify.cpp
+++ llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6643,7 +6643,9 @@
     return C;
 
   // Try to convert operand into a constant by stripping offsets while looking
-  // through invariant.group intrinsics.
+  // through invariant.group intrinsics. Otherwise try possible offsets and
+  // check whether the possible load results are same, if load alignment is
+  // smaller or equal to Global Variable alignment.
   APInt Offset(Q.DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
   PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
       Q.DL, Offset, /* AllowNonInbounts */ true,
@@ -6652,7 +6654,10 @@
     // Index size may have changed due to address space casts.
     Offset = Offset.sextOrTrunc(Q.DL.getIndexTypeSizeInBits(PtrOp->getType()));
     return ConstantFoldLoadFromConstPtr(GV, LI->getType(), Offset, Q.DL);
-  }
+  } else if (uint64_t LoadAlign = LI->getAlign().value();
+             LoadAlign <= GV->getAlign().valueOrOne().value())
+    return ConstantFoldLoadFromPatternedAggregate(
+        GV->getInitializer(), LI->getType(), LoadAlign, PtrOp, Q.DL);
 
   return nullptr;
 }
Index: llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll
===================================================================
--- llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll
+++ llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+; RUN: opt < %s -passes=instsimplify -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK,LE
+; RUN: opt < %s -passes=instsimplify -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK,BE
 @constzeroarray = internal constant [4 x i32] zeroinitializer
 
 @constarray = internal constant [8 x i8]  c"\01\00\01\00\01\00\01\00", align 4
-@conststruct = internal constant <{[8 x i8]}>  <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4
+@constpackedstruct = internal constant <{[8 x i8]}>  <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4
 
 define i32 @load_gep_const_zero_array(i64 %idx) {
 ; CHECK-LABEL: @load_gep_const_zero_array(
@@ -25,37 +25,9 @@
   ret i8 %load
 }
 
-
-define i32 @load_gep_const_patterned_array(i64 %idx) {
-; CHECK-LABEL: @load_gep_const_patterned_array(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @constarray, i64 0, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT:    ret i32 [[LOAD]]
-;
-  %gep = getelementptr inbounds [4 x i32], ptr @constarray, i64 0, i64 %idx
-  %load = load i32, ptr %gep
-  ret i32 %load
-}
-
-define i8 @load_i8_multi_gep_const_array(i64 %idx1, i64 %idx2) {
-; CHECK-LABEL: @load_i8_multi_gep_const_array(
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX1:%.*]]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[IDX2:%.*]]
-; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
-; CHECK-NEXT:    ret i8 [[LOAD]]
-;
-  %gep1 = getelementptr inbounds i8, ptr @constarray, i64 %idx1
-  %gep = getelementptr inbounds i8, ptr %gep1, i64 %idx2
-  %load = load i8, ptr %gep
-  ret i8 %load
-}
-
-; TODO: this should be ret i8 1
 define i8 @gep_load_i8_align2(i64 %idx){
 ; CHECK-LABEL: @gep_load_i8_align2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 2
-; CHECK-NEXT:    ret i8 [[TMP2]]
+; CHECK-NEXT:    ret i8 1
 ;
   %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx
   %2 = load i8, ptr %1, align 2
@@ -74,26 +46,26 @@
   ret i8 %2
 }
 
-; TODO: this should be ret i8 65537 on the case for little endian
 define i32 @gep_i32_load_i32_align4(i64 %idx){
-; CHECK-LABEL: @gep_i32_load_i32_align4(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constarray, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; LE-LABEL: @gep_i32_load_i32_align4(
+; LE-NEXT:    ret i32 65537
+;
+; BE-LABEL: @gep_i32_load_i32_align4(
+; BE-NEXT:    ret i32 16777472
 ;
   %1 = getelementptr inbounds i32, ptr @constarray, i64 %idx
   %2 = load i32, ptr %1, align 4
   ret i32 %2
 }
 
-; TODO: this should be ret i8 65537 on the case for little endian
-define i32 @gep_i32_load_i32_align4_struct(i64 %idx){
-; CHECK-LABEL: @gep_i32_load_i32_align4_struct(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr @conststruct, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-; CHECK-NEXT:    ret i32 [[TMP2]]
+define i32 @gep_i32_load_i32_align4_packedstruct(i64 %idx){
+; LE-LABEL: @gep_i32_load_i32_align4_packedstruct(
+; LE-NEXT:    ret i32 65537
 ;
-  %1 = getelementptr inbounds i32, ptr @conststruct, i64 %idx
+; BE-LABEL: @gep_i32_load_i32_align4_packedstruct(
+; BE-NEXT:    ret i32 16777472
+;
+  %1 = getelementptr inbounds i32, ptr @constpackedstruct, i64 %idx
   %2 = load i32, ptr %1, align 4
   ret i32 %2
 }
@@ -111,22 +83,23 @@
 }
 
 ; can't be folded
-define i32 @gep_i8_load_i32_align1_struct(i64 %idx){
-; CHECK-LABEL: @gep_i8_load_i32_align1_struct(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr @conststruct, i64 [[IDX:%.*]]
+define i32 @gep_i8_load_i32_align1_packedstruct(i64 %idx){
+; CHECK-LABEL: @gep_i8_load_i32_align1_packedstruct(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constpackedstruct, i64 [[IDX:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
-  %1 = getelementptr inbounds i8, ptr @conststruct, i64 %idx
+  %1 = getelementptr inbounds i8, ptr @constpackedstruct, i64 %idx
   %2 = load i32, ptr %1, align 1
   ret i32 %2
 }
-; TODO: This could be folded but need to see GEP source types
+
 define i32 @gep_i16_load_i32_align1(i64 %idx){
-; CHECK-LABEL: @gep_i16_load_i32_align1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; LE-LABEL: @gep_i16_load_i32_align1(
+; LE-NEXT:    ret i32 65537
+;
+; BE-LABEL: @gep_i16_load_i32_align1(
+; BE-NEXT:    ret i32 16777472
 ;
   %1 = getelementptr inbounds i16, ptr @constarray, i64 %idx
   %2 = load i32, ptr %1, align 1