Index: lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- lib/CodeGen/CodeGenPrepare.cpp
+++ lib/CodeGen/CodeGenPrepare.cpp
@@ -215,6 +215,10 @@
     "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
     cl::desc("Allow combining of ScaledReg field in Address sinking."));
 
+static cl::opt<bool>
+    EnableSplitLargeStruct("cgp-split-large-struct", cl::Hidden, cl::init(true),
+                           cl::desc("Enable splitting large struct"));
+
 namespace {
 
 using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
@@ -260,6 +264,11 @@
     /// Keep track of sext chains based on their initial value.
     DenseMap<Value *, Instruction *> SeenChainsForSExt;
 
+    // Keep track of GEPs accessing the same large structs that are candidates
+    // to be split later.
+    DenseMap<Value *, SmallVector<std::pair<GetElementPtrInst *, int64_t>, 32>>
+        LargeStructGEPMap;
+
     /// Keep track of SExt promoted.
     ValueToSExts ValToSExtendedUses;
 
@@ -328,6 +337,7 @@
         SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
     bool splitBranchCondition(Function &F);
     bool simplifyOffsetableRelocate(Instruction &I);
+    bool splitLargeStruct();
   };
 
 } // end anonymous namespace
@@ -408,12 +418,23 @@
   // to help generate sane code for PHIs involving such edges.
   EverMadeChange |= SplitIndirectBrCriticalEdges(F);
 
+  // Test if the target supports r+i addressing mode.  If not, we don't need to
+  // split large structs.
+  if (EnableSplitLargeStruct) {
+    TargetLowering::AddrMode AddrMode;
+    AddrMode.BaseOffs = 101;
+    if (!TLI || !TLI->isLegalAddressingMode(*DL, AddrMode,
+                                    Type::getInt8Ty(F.getContext()), 0))
+      EnableSplitLargeStruct = false;
+  }
+
   bool MadeChange = true;
   while (MadeChange) {
     MadeChange = false;
     SeenChainsForSExt.clear();
     ValToSExtendedUses.clear();
     RemovedInsts.clear();
+    LargeStructGEPMap.clear();
     for (Function::iterator I = F.begin(); I != F.end(); ) {
       BasicBlock *BB = &*I++;
       bool ModifiedDTOnIteration = false;
@@ -425,6 +446,8 @@
     }
     if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
       MadeChange |= mergeSExts(F);
+    if (!LargeStructGEPMap.empty())
+      MadeChange |= splitLargeStruct();
 
     // Really free removed instructions during promotion.
     for (Instruction *I : RemovedInsts)
@@ -2524,22 +2547,26 @@
   /// The ongoing transaction where every action should be registered.
   TypePromotionTransaction &TPT;
 
+  // A GEP which accesses a struct and has too large offset that cannot be
+  // folded into the addressing mode.
+  std::pair<GetElementPtrInst *, int64_t> &LargeStructGEP;
+
   /// This is set to true when we should not do profitability checks.
   /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
   bool IgnoreProfitability;
 
   AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
                         const TargetLowering &TLI,
-                        const TargetRegisterInfo &TRI,
-                        Type *AT, unsigned AS,
+                        const TargetRegisterInfo &TRI, Type *AT, unsigned AS,
                         Instruction *MI, ExtAddrMode &AM,
                         const SetOfInstrs &InsertedInsts,
                         InstrToOrigTy &PromotedInsts,
-                        TypePromotionTransaction &TPT)
+                        TypePromotionTransaction &TPT,
+                        std::pair<GetElementPtrInst *, int64_t> &LargeStructGEP)
       : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
         DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
         MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
-        PromotedInsts(PromotedInsts), TPT(TPT) {
+        PromotedInsts(PromotedInsts), TPT(TPT), LargeStructGEP(LargeStructGEP) {
     IgnoreProfitability = false;
   }
 
@@ -2551,20 +2578,19 @@
   /// optimizations.
   /// \p PromotedInsts maps the instructions to their type before promotion.
   /// \p The ongoing transaction where every action should be registered.
-  static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
-                           Instruction *MemoryInst,
-                           SmallVectorImpl<Instruction*> &AddrModeInsts,
-                           const TargetLowering &TLI,
-                           const TargetRegisterInfo &TRI,
-                           const SetOfInstrs &InsertedInsts,
-                           InstrToOrigTy &PromotedInsts,
-                           TypePromotionTransaction &TPT) {
+  static ExtAddrMode
+  Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
+        SmallVectorImpl<Instruction *> &AddrModeInsts,
+        const TargetLowering &TLI, const TargetRegisterInfo &TRI,
+        const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
+        TypePromotionTransaction &TPT,
+        std::pair<GetElementPtrInst *, int64_t> &LargeStructGEP) {
     ExtAddrMode Result;
 
-    bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI,
-                                         AccessTy, AS,
+    bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
                                          MemoryInst, Result, InsertedInsts,
-                                         PromotedInsts, TPT).matchAddr(V, 0);
+                                         PromotedInsts, TPT, LargeStructGEP)
+                       .matchAddr(V, 0);
     (void)Success; assert(Success && "Couldn't select *anything*?");
     return Result;
   }
@@ -3720,6 +3746,31 @@
         // Check to see if we can fold the base pointer in too.
         if (matchAddr(AddrInst->getOperand(0), Depth+1))
           return true;
+      } else {
+        // Record a GEP which accesses a struct and has too large offset that
+        // cannot fit into the r+i addressing mode.  The GEP is the
+        // candidate to be split later.
+        if (EnableSplitLargeStruct && Depth == 0 && ConstantOffset > 0) {
+          // Simple and common case that only one instruction which is a GEP is
+          // used in calculating the address for the struct access.
+          Value *Base = AddrInst->getOperand(0);
+          PointerType *BaseTy = cast<PointerType>(Base->getType());
+          if (BaseTy->getElementType()->isStructTy()) {
+            Instruction *BaseI = dyn_cast<Instruction>(Base);
+            if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
+                (BaseI && !isa<CastInst>(BaseI) &&
+                 !isa<BinaryOperator>(BaseI) && !isa<GetElementPtrInst>(BaseI)))
+              if (GetElementPtrInst *GEP =
+                      dyn_cast<GetElementPtrInst>(AddrInst))
+                if ((BaseI && GEP->getParent() != BaseI->getParent()) ||
+                    (!BaseI &&
+                     GEP->getParent() !=
+                         &GEP->getParent()->getParent()->getEntryBlock()))
+                  // Make sure the base is not in the same basic block as the
+                  // GEP.  Otherwise, the split might be undone.
+                  LargeStructGEP = std::make_pair(GEP, ConstantOffset);
+          }
+        }
       }
       AddrMode.BaseOffs -= ConstantOffset;
       return false;
@@ -4130,12 +4181,12 @@
     // will tell us if the addressing mode for the memory operation will
     // *actually* cover the shared instruction.
     ExtAddrMode Result;
+    std::pair<GetElementPtrInst *, int64_t> LargeStructGEP(nullptr, 0);
     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
         TPT.getRestorationPoint();
-    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI,
-                                  AddressAccessTy, AS,
-                                  MemoryInst, Result, InsertedInsts,
-                                  PromotedInsts, TPT);
+    AddressingModeMatcher Matcher(
+        MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
+        InsertedInsts, PromotedInsts, TPT, LargeStructGEP);
     Matcher.IgnoreProfitability = true;
     bool Success = Matcher.matchAddr(Address, 0);
     (void)Success; assert(Success && "Couldn't select *anything*?");
@@ -4237,11 +4288,19 @@
     // the result may differ depending on what other uses our candidate
     // addressing instructions might have.
     AddrModeInsts.clear();
+    std::pair<GetElementPtrInst *, int64_t> LargeStructGEP(nullptr, 0);
     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
         V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
-        InsertedInsts, PromotedInsts, TPT);
-    NewAddrMode.OriginalValue = V;
+        InsertedInsts, PromotedInsts, TPT, LargeStructGEP);
+
+    if (LargeStructGEP.first &&
+        LargeStructGEP.first->getParent() != MemoryInst->getParent())
+      // Only collect GEPs that cannot sink unless the underlying struct is
+      // split.
+      LargeStructGEPMap[LargeStructGEP.first->getPointerOperand()].push_back(
+          LargeStructGEP);
 
+    NewAddrMode.OriginalValue = V;
     if (!AddrModes.addNewAddrMode(NewAddrMode))
       break;
   }
@@ -4750,6 +4809,147 @@
   return Changed;
 }
 
+static int
+compareGEPOffset(const std::pair<GetElementPtrInst *, int64_t> *LHS,
+                 const std::pair<GetElementPtrInst *, int64_t> *RHS) {
+  if (LHS->first == RHS->first)
+    return 0;
+  if (LHS->second != RHS->second)
+    return LHS->second > RHS->second ? 1 : -1;
+  return LHS->first > RHS->first ? 1 : -1;
+}
+
+// Spliting large structs so that the GEPs accessing the structs can sink to the
+// same block as their users.  For example, a large struct starting from %base
+// is splitted into two parts where the second part starts from %new_base.
+//
+// Before:
+// BB0:
+//   %base     =
+//
+// BB1:
+//   %gep0     = gep %base, off0
+//   %gep1     = gep %base, off1
+//   %gep2     = gep %base, off2
+//
+// BB2:
+//   %load1    = load %gep0
+//   %load2    = load %gep1
+//   %load3    = load %gep2
+//
+// After:
+// BB0:
+//   %base     =
+//   %new_base = gep %base, off0
+//
+// BB1:
+//   %new_gep0 = %new_base
+//   %new_gep1 = gep %new_base, off1 - off0
+//   %new_gep1 = gep %new_base, off2 - off0
+//
+// BB2:
+//   %load1    = load i32, i32* %new_gep0
+//   %load2    = load i32, i32* %new_gep1
+//   %load3    = load i32, i32* %new_gep2
+//
+// %gep1 and %gep2 can be sunk to BB2 now after the splitting.
+bool CodeGenPrepare::splitLargeStruct() {
+  bool Changed = false;
+  for (auto &Entry : LargeStructGEPMap) {
+    SmallVectorImpl<std::pair<GetElementPtrInst *, int64_t>> &LargeStructGEPs =
+        Entry.second;
+    // Sorting all the GEPs of the same struct based on the offsets.
+    array_pod_sort(LargeStructGEPs.begin(), LargeStructGEPs.end(),
+                   compareGEPOffset);
+    LargeStructGEPs.erase(
+        std::unique(LargeStructGEPs.begin(), LargeStructGEPs.end()),
+        LargeStructGEPs.end());
+    // Skip if all the GEPs have the same offsets.
+    if (LargeStructGEPs.front().second == LargeStructGEPs.back().second)
+      continue;
+    GetElementPtrInst *BaseGEP = LargeStructGEPs.begin()->first;
+    int64_t BaseOffset = LargeStructGEPs.begin()->second;
+    Value *NewBaseGEP = nullptr;
+    for (auto &LargeStructGEP : LargeStructGEPs) {
+      GetElementPtrInst *GEP = LargeStructGEP.first;
+      int64_t Offset = LargeStructGEP.second;
+      if (!GEP)
+        continue;
+      if (Offset != BaseOffset) {
+        TargetLowering::AddrMode AddrMode;
+        AddrMode.BaseOffs = Offset - BaseOffset;
+        if (!TLI->isLegalAddressingMode(*DL, AddrMode,
+                                        GEP->getResultElementType(),
+                                        GEP->getAddressSpace())) {
+          // We need to create a new base if the offset to the current base is
+          // too large to fit into the addressing mode. So, a very large struct
+          // may be splitted into several parts.
+          BaseGEP = GEP;
+          BaseOffset = Offset;
+          NewBaseGEP = nullptr;
+        }
+      }
+
+      // Generate a new GEP to replace the current one.
+      IRBuilder<> Builder(GEP);
+      Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+      Type *I8PtrTy =
+          Builder.getInt8PtrTy(GEP->getType()->getPointerAddressSpace());
+      Type *I8Ty = Builder.getInt8Ty();
+
+      if (!NewBaseGEP) {
+        // Create a new base if we don't have one yet.  Find the insertion
+        // pointer for the new base first.
+        BasicBlock::iterator NewBaseInsertPt;
+        BasicBlock *NewBaseInsertBB;
+        if (Instruction *BaseI = dyn_cast<Instruction>(Entry.first)) {
+          // If the base of the struct is an instruction, the new base will be
+          // inserted close to it.
+          NewBaseInsertBB = BaseI->getParent();
+          if (isa<PHINode>(BaseI))
+            NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+          else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
+            NewBaseInsertBB =
+                SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
+            NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+          } else
+            NewBaseInsertPt = std::next(BaseI->getIterator());
+        } else {
+          // If the current base is an argument or global value, the new base
+          // will be inserted to the entry block.
+          NewBaseInsertBB = &BaseGEP->getParent()->getParent()->getEntryBlock();
+          NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+        }
+        IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
+        // Create a new base.
+        Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
+        NewBaseGEP = Entry.first;
+        if (NewBaseGEP->getType() != I8PtrTy)
+          NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
+        NewBaseGEP =
+            NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
+      }
+
+      Value *NewGEP = NewBaseGEP;
+      if (Offset == BaseOffset) {
+        if (GEP->getType() != I8PtrTy)
+          NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
+      } else {
+        // Calculate the new offset for the new GEP.
+        Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
+        NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
+
+        if (GEP->getType() != I8PtrTy)
+          NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
+      }
+      GEP->replaceAllUsesWith(NewGEP);
+      GEP->eraseFromParent();
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
 /// Return true, if an ext(load) can be formed from an extension in
 /// \p MovedExts.
 bool CodeGenPrepare::canFormExtLd(
Index: test/Transforms/CodeGenPrepare/AArch64/large-struct.ll
===================================================================
--- /dev/null
+++ test/Transforms/CodeGenPrepare/AArch64/large-struct.ll
@@ -0,0 +1,121 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+
+%struct_type = type { [10000 x i32], i32, i32 }
+
+define void @test1(%struct_type** %s, i32 %n) {
+; CHECK-LABEL: test1
+entry:
+  %struct = load %struct_type*, %struct_type** %s
+  br label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #40000
+; CHECK-NOT: mov     w{{[0-9]+}}, #40004
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+}
+
+define void @test2(%struct_type* %struct, i32 %n) {
+; CHECK-LABEL: test2
+entry:
+  %cmp = icmp eq %struct_type* %struct, null
+  br i1 %cmp, label %while_end, label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #40000
+; CHECK-NOT: mov     w{{[0-9]+}}, #40004
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp1 = icmp slt i32 %phi, %n
+  br i1 %cmp1, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+}
+
+define void @test3(%struct_type* %s1, %struct_type* %s2, i1 %cond, i32 %n) {
+; CHECK-LABEL: test3
+entry:
+  br i1 %cond, label %if_true, label %if_end
+
+if_true:
+  br label %if_end
+
+if_end:
+  %struct = phi %struct_type* [ %s1, %entry ], [ %s2, %if_true ]
+  %cmp = icmp eq %struct_type* %struct, null
+  br i1 %cmp, label %while_end, label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %if_end ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #40000
+; CHECK-NOT: mov     w{{[0-9]+}}, #40004
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp1 = icmp slt i32 %phi, %n
+  br i1 %cmp1, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+}
+
+declare %struct_type* @foo()
+
+define void @test4(i32 %n) personality i32 (...)* @__FrameHandler {
+; CHECK-LABEL: test4
+entry:
+  %struct = invoke %struct_type* @foo() to label %while_cond unwind label %cleanup
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #40000
+; CHECK-NOT: mov     w{{[0-9]+}}, #40004
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+
+cleanup:
+  landingpad { i8*, i32 } cleanup
+  unreachable
+}
+
+declare i32 @__FrameHandler(...)