Index: lib/Transforms/IPO/ArgumentPromotion.cpp
===================================================================
--- lib/Transforms/IPO/ArgumentPromotion.cpp
+++ lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -396,6 +396,41 @@
   return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
 }
 
+/// Return true if the i is a large index that can not be used by extractvalue.
+/// ExtractValue can not accept indices greater than UINT_MAX.
+static bool IsLargeIndex(uint64_t i) { return i > UINT_MAX; }
+
+/// Return the remaining of subtracting APrefix from BPrefix.
+/// This function expects that APrefix is a Prefix of BPrefix.
+static IndicesVector SubtractPrefix(const IndicesVector &APrefix,
+                                    const IndicesVector &BPrefix) {
+  assert(APrefix.size() <= BPrefix.size() &&
+         "Subtracting longer prefix from shorter prefix");
+  return IndicesVector(BPrefix.begin() + APrefix.size(), BPrefix.end());
+}
+
+/// Return true if suffix resulted from APrefix - BPrefix are all small indices.
+static bool HasSmallSuffix(const IndicesVector &APrefix,
+                           const IndicesVector &BPrefix) {
+  auto S = SubtractPrefix(APrefix, BPrefix);
+  return std::find_if(S.begin(), S.end(), IsLargeIndex) == S.end();
+}
+
+/// Return true if suffix resulted from APrefix - BPrefix are all small indices.
+/// And also copy the subtraction results in Suffix.
+static bool GetSmallSuffix(const IndicesVector &APrefix,
+                           const IndicesVector &BPrefix,
+                           std::vector<unsigned> &Suffix) {
+  auto S = SubtractPrefix(APrefix, BPrefix);
+  for (auto i : S) {
+    if (IsLargeIndex(i)) {
+      Suffix.clear();
+      return false;
+    }
+    Suffix.push_back(i);
+  }
+  return true;
+}
 
 /// Checks if Indices, or a prefix of Indices, is in Set.
 static bool PrefixIn(const IndicesVector &Indices,
@@ -723,6 +758,30 @@
         OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
       }
 
+      // Create as few promoted arguments as possible. We can value extract a
+      // promoted argument if it is part of another promoted argument.
+      ScalarizeTable ScalarToRemove;
+      for (auto &MI : ArgIndices) {
+        // Make sure we get the proper prefix in case this is a direct load.
+        IndicesVector P = MI.second.empty() ? IndicesVector(0) : MI.second;
+        for (auto &NI : ArgIndices) {
+          if (ScalarToRemove.count(NI) || ScalarToRemove.count(MI))
+            continue;
+          // Erase NI if MI is a prefix of it and we can create an extracted
+          // value from promoted MI.
+          if (MI != NI && IsPrefix(P, NI.second) &&
+              HasSmallSuffix(P, NI.second)) {
+            ScalarToRemove.insert(NI);
+          }
+        }
+      }
+
+      // Erase the subcomponent scalars.
+      for (auto &EI : ScalarToRemove) {
+        OriginalLoads.erase(std::make_pair(&*I, EI.second));
+        ArgIndices.erase(EI);
+      }
+
       // Add a parameter to the function for each element passed in.
       for (const auto &ArgIndex : ArgIndices) {
         // not allowed to dereference ->begin() if size() is 0
@@ -982,6 +1041,7 @@
               << "' in function '" << F->getName() << "'\n");
       } else {
         GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
+        std::vector<unsigned> Suffix;
         IndicesVector Operands;
         Operands.reserve(GEP->getNumIndices());
         for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
@@ -995,6 +1055,10 @@
         Function::arg_iterator TheArg = I2;
         for (ScalarizeTable::iterator It = ArgIndices.begin();
              It->second != Operands; ++It, ++TheArg) {
+          IndicesVector P = It->second.empty() ? IndicesVector(0) : It->second;
+          // Found Operands' prefix.
+          if (IsPrefix(P, Operands) && GetSmallSuffix(P, Operands, Suffix))
+            break;
           assert(It != ArgIndices.end() && "GEP not handled??");
         }
 
@@ -1011,8 +1075,11 @@
         // All of the uses must be load instructions.  Replace them all with
         // the argument specified by ArgNo.
         while (!GEP->use_empty()) {
+          Value *V = &*TheArg;
+          if (!Suffix.empty())
+            V = ExtractValueInst::Create(V, Suffix, "", &*NF->begin()->begin());
           LoadInst *L = cast<LoadInst>(GEP->user_back());
-          L->replaceAllUsesWith(&*TheArg);
+          L->replaceAllUsesWith(V);
           L->eraseFromParent();
         }
         GEP->eraseFromParent();
Index: test/Transforms/ArgumentPromotion/aggregate-promote-overlapping.ll
===================================================================
--- /dev/null
+++ test/Transforms/ArgumentPromotion/aggregate-promote-overlapping.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -argpromotion -S | FileCheck %s 
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+%struct.S = type { %struct.P, i32 }
+%struct.P = type { i32, i32 }
+%struct.X = type { i32, i32 }
+
+declare void @take_p(%struct.P) readnone
+declare void @take_x(%struct.X) readnone
+
+; CHECK-LABEL define internal i32 @promotei_struct(%struct.P
+; CHECK: extractvalue
+define internal i32 @promote_struct(%struct.S*) {
+  %2 = getelementptr inbounds %struct.S, %struct.S* %0, i32 0, i32 0
+  %3 = load %struct.P, %struct.P* %2, align 4
+  call void @take_p(%struct.P %3)
+  %4 = getelementptr inbounds %struct.S, %struct.S* %0, i32 0, i32 0, i32 0
+  %5 = load i32, i32* %4, align 4
+  ret i32 %5
+}
+
+define i32 @caller_struct(%struct.S*) {
+  %2 = call i32 @promote_struct(%struct.S* %0)
+  ret i32 %2
+}
+
+; CHECK-LABEL define internal i32 @promote_array(%struct.X
+; CHECK: extractvalue
+define internal i32 @promote_array([1024 x %struct.X]*) #1 {
+  %2 = getelementptr inbounds [1024 x %struct.X], [1024 x %struct.X]* %0, i64 0, i64 1023
+  %3 = load %struct.X, %struct.X* %2, align 4
+  call void @take_x(%struct.X %3)
+  %4 = getelementptr inbounds [1024 x %struct.X], [1024 x %struct.X]* %0, i64 0, i64 1023, i32 0
+  %5 = load i32, i32* %4, align 4
+  ret i32 %5
+}
+
+define i32 @caller_array([1024 x %struct.X]*) #0 {
+  %2 = call i32 @promote_array([1024 x %struct.X]* %0)
+  ret i32 %2
+}